diff --git a/.qdrant-initialized b/.qdrant-initialized new file mode 100644 index 0000000..e69de29 diff --git a/.serena/project.yml b/.serena/project.yml index 51bb4ad..8d7121f 100644 --- a/.serena/project.yml +++ b/.serena/project.yml @@ -2,18 +2,49 @@ project_name: "factorylm" -# list of languages for which language servers are started +# list of languages for which language servers are started; choose from: +# al angular ansible bash clojure +# cpp cpp_ccls crystal csharp csharp_omnisharp +# dart elixir elm erlang fortran +# fsharp go groovy haskell haxe +# hlsl html java json julia +# kotlin lean4 lua luau markdown +# matlab msl nix ocaml pascal +# perl php php_phpactor powershell python +# python_jedi python_ty r rego ruby +# ruby_solargraph rust scala scss solidity +# svelte swift systemverilog terraform toml +# typescript typescript_vts vue yaml zig +# (This list may be outdated. For the current list, see values of Language enum here: +# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py +# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# - For Angular projects, use angular (subsumes typescript+html; requires `npm install` in the project root) +# - For Svelte projects, use svelte (subsumes typescript/javascript for .svelte projects; requires npm) +# - For SCSS / Sass / plain CSS, use scss (some-sass-language-server handles all three) +# - For Free Pascal/Lazarus, use pascal +# Special requirements: +# Some languages require additional setup/installations. +# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. languages: - python - typescript # the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings encoding: utf-8 # whether to use project's .gitignore files to ignore files ignore_all_files_in_gitignore: true # list of additional paths to ignore in this project. +# Same syntax as gitignore, so you can use * and **. +# Note: global ignored_paths from serena_config.yml are also applied additively. ignored_paths: - "node_modules/**" - "_BUILDS/**" @@ -28,18 +59,27 @@ ignored_paths: - "docs/archive/**" # whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 read_only: false # list of tool names to exclude. +# This extends the existing exclusions (e.g. from the global configuration) +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html excluded_tools: [] -# list of tools to include that would otherwise be disabled +# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default). +# This extends the existing inclusions (e.g. from the global configuration). +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html included_optional_tools: [] -# fixed set of tools to use as the base tool set +# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. +# This cannot be combined with non-empty excluded_tools or included_optional_tools. +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html fixed_tools: [] -# initial prompt for the project. +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). initial_prompt: > FactoryLM monorepo — industrial AI platform diagnosing factory faults using NVIDIA Cosmos Reason 2. Key dirs: cosmos/ (client, watcher, agent), @@ -47,15 +87,67 @@ initial_prompt: > sim/ (PLC simulator + Factory I/O bridge), core/ (shared Python library). Pipeline: Factory I/O -> Modbus TCP -> Matrix API -> Cosmos -> Diagnosis. SAFETY: NEVER modify code tagged # SAFETY, # PLC, or # CRITICAL. - # list of mode names to that are always to be included in the set of active modes base_modes: -# list of mode names that are to be activated by default. +# list of mode names that are to be activated by default, overriding the setting in the global configuration. +# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes. +# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this overrides the setting from the global configuration (serena_config.yml). +# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply +# for this project. +# This setting can, in turn, be overridden by CLI parameters (--mode). +# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes default_modes: # time budget (seconds) per tool call for the retrieval of additional symbol information +# such as docstrings or parameter information. +# This overrides the corresponding setting in the global configuration; see the documentation there. +# If null or missing, use the setting from the global configuration. symbol_info_budget: # The language backend to use for this project. +# If not set, the global setting from serena_config.yml is used. +# Valid values: LSP, JetBrains +# Note: the backend is fixed at startup. If a project with a different backend +# is activated post-init, an error will be returned. language_backend: + +# list of mode names to be activated additionally for this project, e.g. ["query-projects"] +# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes. +# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes +added_modes: + +# line ending convention to use when writing source files. +# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) +# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. +line_ending: + +# list of regex patterns which, when matched, mark a memory entry as read‑only. +# Extends the list from the global configuration, merging the two lists. +read_only_memory_patterns: [] + +# list of regex patterns for memories to completely ignore. +# Matching memories will not appear in list_memories or activate_project output +# and cannot be accessed via read_memory or write_memory. +# To access ignored memory files, use the read_file tool on the raw file path. +# Extends the list from the global configuration, merging the two lists. +# Example: ["_archive/.*", "_episodes/.*"] +ignored_memory_patterns: [] + +# advanced configuration option allowing to configure language server-specific options. +# Maps the language key to the options. +# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. +# No documentation on options means no options are available. +ls_specific_settings: {} + +# list of additional workspace folder paths for cross-package reference support (e.g. in monorepos). +# Paths can be absolute or relative to the project root. +# Each folder is registered as an LSP workspace folder, enabling language servers to discover +# symbols and references across package boundaries. +# Currently supported for: TypeScript. +# Example: +# additional_workspace_folders: +# - ../sibling-package +# - ../shared-lib +additional_workspace_folders: [] diff --git a/PLAN.md b/PLAN.md index bf51cca..c996567 100644 --- a/PLAN.md +++ b/PLAN.md @@ -1,86 +1,92 @@ -# PLAN: MES Core — Week 2 (Modbus Machine State Reader) +# PLAN: MES Core — Week 6 (Atlas CMMS Bidirectional Sync) -**Branch:** `feat/mes-week2-state-reader` -**Issue:** Mikecranesync/MIRA#320 -**PRD:** `docs/PRD-MES-CORE.md` -**Date:** 2026-04-15 -**Depends on:** Week 1 (feat/mes-week1-db-schema) merged +**Branch:** `feat/mes-week6-cmms-sync` +**Issue:** Mikecranesync/MIRA#324 +**PRD:** `docs/PRD-MES-CORE.md §3 (ERP/CMMS Integration)` +**Date:** 2026-04-16 +**Depends on:** Weeks 1–5 merged --- ## Objective -Build the machine state reader: a background poller that reads the plc-modbus HTTP API every 5 seconds per configured line, detects state transitions (RUNNING/DOWN/IDLE/OFFLINE), writes them to `machine_states`, and exposes `GET /api/mes/lines` and `GET /api/mes/lines/{id}/state` REST endpoints. +Bidirectional sync between the MES work order system and Atlas CMMS +(implemented as a GitHub Gist-based portable work order format — see +`cmms/gist_work_order.py` for the existing pattern). + +- **MES → CMMS (outbound)**: POST /api/mes/cmms/sync/{id} pushes a WO as a + Gist document (Markdown + CSV) readable by any CMMS. +- **CMMS → MES (inbound)**: POST /api/mes/cmms/ingest accepts a CMMS work order + payload and creates/updates a WO in the MES database. +- `cmms_enabled` config gate: sync is opt-in; tests run with it disabled. ## Affected Files **New:** -- `services/mes/backend/services/__init__.py` -- `services/mes/backend/services/plc_client.py` — async HTTP client wrapping plc-modbus -- `services/mes/backend/services/state_machine.py` — pure state detection from IO snapshot -- `services/mes/backend/services/state_poller.py` — asyncio background poll loop -- `services/mes/backend/routes/lines.py` — GET /api/mes/lines, GET /lines/{id}/state -- `services/mes/tests/test_machine_states.py` — 10 unit tests, all mocked +- `services/mes/alembic/versions/0002_add_cmms_ref.py` — adds cmms_ref + cmms_synced_at to work_orders +- `services/mes/backend/services/cmms_client.py` — CMMS Gist HTTP adapter +- `services/mes/backend/routes/cmms.py` — sync endpoints +- `services/mes/tests/test_cmms.py` — unit tests **Modified:** -- `services/mes/requirements.txt` — add httpx -- `services/mes/backend/config.py` — add plc_modbus_url setting -- `services/mes/backend/main.py` — wire poller into lifespan, add lines router -- `docker-compose.yml` — add PLC_MODBUS_URL env to mes container +- `services/mes/backend/models/db_models.py` — WorkOrder gets cmms_ref, cmms_synced_at +- `services/mes/backend/config.py` — cmms_enabled, cmms_github_token +- `services/mes/backend/main.py` — include cmms router +- `PLAN.md` + +--- ## Approach -1. `plc_client.py` — thin async wrapper around `GET /api/plc/io` (httpx). Raises `PLCOfflineError` on timeout/connection failure so caller can set OFFLINE state. -2. `state_machine.py` — pure function `detect_state(io_data)` → `(MachineStateEnum, reason_code | None)`. Derived from `VFDStatus` and `ErrorCode` registers. No DB or network calls — fully testable without mocks. -3. `state_poller.py` — asyncio task, one iteration per line every 5s. Maintains in-memory cache to avoid DB reads on every tick. Writes to `machine_states` only on transition. -4. `lines.py` routes — two endpoints: list all lines (from DB), get current state (from in-memory cache + last DB row). -5. `main.py` lifespan — start poller task on startup, cancel on shutdown. +### 1. DB migration 0002 -State transition write: close open row (`ended_at = NOW()`), insert new row. +Adds to work_orders: + - `cmms_ref TEXT` — GitHub Gist ID once synced (NULL = not yet pushed) + - `cmms_synced_at TIMESTAMPTZ` — timestamp of last successful push -## State Machine +### 2. CMMS Client (`cmms_client.py`) -``` -IO: VFDStatus=1, ErrorCode=0 → RUNNING -IO: VFDStatus=2 OR ErrorCode>0 → DOWN (reason_code from ErrorCode map) -IO: VFDStatus=0, ErrorCode=0 → IDLE -HTTP failure / timeout → OFFLINE -``` +Sync HTTP adapter using `httpx.Client`. Controlled by `settings.cmms_enabled`. +When disabled, `push_work_order()` returns a mock response — no real API calls. -## ErrorCode → reason_code map +Functions: +- `format_work_order(wo, line_name, product_sku, product_name) -> dict` + Maps MES WO fields to the CMMS Gist metadata schema. +- `push_work_order(metadata, gist_id=None) -> dict` + Creates Gist if gist_id is None, updates if provided. + Returns `{gist_id, gist_url}`. -```python -{1: "OVERLOAD", 2: "OVERHEAT", 3: "SENSOR_FAIL", 4: "JAM", 7: "E_STOP"} -``` +GitHub Gist API: + POST https://api.github.com/gists — create + PATCH https://api.github.com/gists/{gist_id} — update -## Risks +### 3. CMMS Routes (`cmms.py`) -- plc-modbus in mock mode returns VFDStatus=0 at rest — poller sees IDLE immediately (expected) -- Multiple lines share one plc-modbus service currently — same io_data, different `line_id` rows +| Method | Path | Description | +|--------|------|-------------| +| POST | `/api/mes/cmms/sync/{work_order_id}` | Push WO to CMMS Gist; saves cmms_ref back to DB | +| GET | `/api/mes/cmms/sync/{work_order_id}` | Return sync status (cmms_ref, cmms_synced_at) | +| POST | `/api/mes/cmms/ingest` | Import CMMS work order → create/update in MES | -## Rollback +Ingest body resolves product by SKU, line by name. Creates WO as PENDING +with `cmms_ref` already populated (marks it as CMMS-originated). -```bash -git checkout feat/mes-week1-db-schema -``` +--- -## Verification Steps +## Risks -```bash -# Unit tests (no docker needed) -cd services/mes && pytest tests/test_machine_states.py -v +- `cmms_enabled=False` must short-circuit cleanly in both sync and ingest paths. +- GitHub token is a secret — never logged or returned in API responses. +- `cmms_ref` uniqueness: if the same WO is synced twice, update the Gist, don't create a second. -# Integration: start stack, check state endpoint -docker compose up mes-db mes plc-modbus -d -curl localhost:8300/api/mes/lines -curl localhost:8300/api/mes/lines//state +## Rollback -# Inject a fault and verify DB transition -curl -X POST localhost:8001/api/plc/mock/fault -H "Content-Type: application/json" -d '{"fault_type":"jam"}' -sleep 8 -curl localhost:8300/api/mes/lines//state # should show DOWN / JAM -``` +Delete new files, remove import from main.py. +Run migration downgrade: `ALTER TABLE work_orders DROP COLUMN cmms_ref; DROP COLUMN cmms_synced_at;` -## Note on Active Focus Window +## Verification -Explicitly authorized by Mike (2026-04-15 session). +1. `pytest tests/test_cmms.py -v` — all new tests pass +2. `pytest tests/ -v` — full suite (95 + new) passes, zero regressions +3. With cmms_enabled=False: sync endpoint returns 200 with mock gist_id +4. Ingest: POST with valid line/product → WO appears in GET /api/mes/work-orders diff --git a/infra/ansible/README.md b/infra/ansible/README.md index 5ac55f9..e605bd9 100644 --- a/infra/ansible/README.md +++ b/infra/ansible/README.md @@ -44,6 +44,39 @@ ansible-playbook -i inventory.ini playbook.yml --check | Shell config | Universal env vars + aliases in `.zshrc` (preserves existing content) | | tmux | Deploys shared `~/.tmux.conf`, auto-attaches on SSH login | | Remote Login | Enables macOS SSH access | +| **SSH config (Tailscale-first)** | Deploys canonical `~/.ssh/config` pointing every alias (alpha/bravo/charlie/plc/travel/prod/pi) at Tailscale IPs. Source: `templates/ssh_config.j2` | +| **Claude Code permissions** | Merges canonical allow-list (ssh/scp/rsync/tailscale/nc/ping/dig/host) into every node's `~/.claude/settings.json` so agents never prompt for cluster-internal commands. Source: `files/merge_claude_permissions.py` | + +### Canonical SSH Aliases (after sync) + +```bash +ssh alpha # 100.107.140.12 (factorylm@) +ssh bravo # 100.86.236.11 (bravonode@) # Tailscale default +ssh bravo-lan # 192.168.1.11 # same-subnet fallback +ssh charlie # 100.70.49.126 (charlienode@) +ssh plc # 100.72.2.99 (hharp@) +ssh travel # 100.83.251.23 (hharp@) +ssh prod # 100.68.120.99 (root@) # VPS via Tailscale +ssh prod-public # 165.245.138.91 # DigitalOcean fallback +ssh pi # 100.66.216.6 (pi@) +``` + +The first time each node receives the template, Ansible writes a timestamped backup alongside (e.g. `~/.ssh/config.3854.2026-04-24@…~`) so a pre-existing hand-edit is never lost. + +### Canonical Claude Code Permissions + +Appended to every node's `~/.claude/settings.json` → `permissions.allow`: + +``` +Bash(ssh *) Bash(scp *) Bash(rsync *) +Bash(tailscale *) Bash(/opt/homebrew/bin/tailscale *) +Bash(nc -z *) Bash(ping -c* *) +Bash(dig *) Bash(host *) +``` + +The merge is additive: only adds missing entries, preserves existing hooks / statusLine / model settings. A version marker at `~/.claude/.permissions-merged-v` short-circuits re-runs at the same version. + +To roll out a new canonical entry: edit `files/merge_claude_permissions.py`, raise `CANONICAL_VERSION`, commit, re-run the playbook. ## Adding New Packages diff --git a/infra/ansible/files/merge_claude_permissions.py b/infra/ansible/files/merge_claude_permissions.py new file mode 100644 index 0000000..228e976 --- /dev/null +++ b/infra/ansible/files/merge_claude_permissions.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +"""Merge cluster-canonical Claude Code permission entries into ~/.claude/settings.json. + +Called by the factorylm Ansible playbook. Idempotent: safe to re-run. + +- Preserves all existing keys (statusLine, effortLevel, hooks, etc.). +- Adds any canonical entries missing from permissions.allow. +- Leaves user-added local entries untouched. +- Writes a version marker to ~/.claude/.permissions-merged-v so Ansible's + `creates:` idempotency skips re-runs at the same version. + +Bump CANONICAL_VERSION whenever CANONICAL_ALLOW changes so the task re-fires. +""" +from __future__ import annotations + +import json +import pathlib +import sys + +CANONICAL_VERSION = 1 + +CANONICAL_ALLOW = [ + "Bash(ssh *)", + "Bash(scp *)", + "Bash(rsync *)", + "Bash(tailscale *)", + "Bash(/opt/homebrew/bin/tailscale *)", + "Bash(nc -z *)", + "Bash(ping -c* *)", + "Bash(dig *)", + "Bash(host *)", +] + + +def main() -> int: + claude_dir = pathlib.Path.home() / ".claude" + claude_dir.mkdir(parents=True, exist_ok=True) + settings_path = claude_dir / "settings.json" + marker = claude_dir / f".permissions-merged-v{CANONICAL_VERSION}" + + if marker.exists(): + print(f"marker {marker.name} present — nothing to do") + return 0 + + if settings_path.exists(): + try: + data = json.loads(settings_path.read_text()) + except json.JSONDecodeError as exc: + print(f"ERROR: {settings_path} is not valid JSON: {exc}", file=sys.stderr) + return 2 + else: + data = {} + + perms = data.setdefault("permissions", {}) + allow = perms.setdefault("allow", []) + if not isinstance(allow, list): + print(f"ERROR: permissions.allow is not a list: {type(allow).__name__}", file=sys.stderr) + return 2 + + added = 0 + for entry in CANONICAL_ALLOW: + if entry not in allow: + allow.append(entry) + added += 1 + + perms.setdefault("deny", []) + perms.setdefault("defaultMode", "auto") + + settings_path.write_text(json.dumps(data, indent=2) + "\n") + marker.touch() + + print(f"merged: {added} new of {len(CANONICAL_ALLOW)} canonical entries (v{CANONICAL_VERSION})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/infra/ansible/inventory.ini b/infra/ansible/inventory.ini index 7cc58ee..9e9f764 100644 --- a/infra/ansible/inventory.ini +++ b/infra/ansible/inventory.ini @@ -2,11 +2,14 @@ # Connects over Tailscale SSH. LAN IPs used for services (see CLUSTER.md). # # Usage: ansible-playbook -i inventory.ini playbook.yml +# +# Tailscale IPs refreshed 2026-04-24 — prior values (100.108.19.94, 100.82.246.52) +# belonged to retired hardware (offline 41–54 days on the tailnet). [mac_minis] -alpha ansible_host=100.108.19.94 ansible_user=alphanode -bravo ansible_host=100.86.236.11 ansible_user=bravonode -charlie ansible_host=100.82.246.52 ansible_user=charlienode +alpha ansible_host=100.107.140.12 ansible_user=factorylm +bravo ansible_host=100.86.236.11 ansible_user=bravonode +charlie ansible_host=100.70.49.126 ansible_user=charlienode [mac_minis:vars] ansible_python_interpreter=/opt/homebrew/bin/python3 diff --git a/infra/ansible/playbook.yml b/infra/ansible/playbook.yml index 3b8b78b..ab44db8 100644 --- a/infra/ansible/playbook.yml +++ b/infra/ansible/playbook.yml @@ -136,3 +136,45 @@ path: "{{ ansible_env.HOME }}/.claude" state: directory mode: "0755" + + # ===================================================================== + # Cluster SSH + Claude Code permission sync (added 2026-04-24) + # Makes every Mac mini Tailscale-first and lets Claude Code run ssh / + # tailscale / rsync without a permission prompt on each session. + # ===================================================================== + + # --- Ensure ~/.ssh exists with correct perms --- + - name: Ensure ~/.ssh directory exists + tags: [ssh_claude_sync, ssh] + ansible.builtin.file: + path: "{{ ansible_env.HOME }}/.ssh" + state: directory + mode: "0700" + + # --- Deploy cluster-canonical SSH config (Tailscale-first) --- + - name: Deploy cluster-canonical SSH config + tags: [ssh_claude_sync, ssh] + ansible.builtin.template: + src: templates/ssh_config.j2 + dest: "{{ ansible_env.HOME }}/.ssh/config" + mode: "0600" + backup: true + + # --- Drop the Claude Code permission-merger helper --- + - name: Install Claude Code permission-merger helper + tags: [ssh_claude_sync, claude_perms] + ansible.builtin.copy: + src: files/merge_claude_permissions.py + dest: "{{ ansible_env.HOME }}/.claude/merge_claude_permissions.py" + mode: "0755" + + # --- Merge canonical permission entries into ~/.claude/settings.json --- + # Idempotent: marker file at ~/.claude/.permissions-merged-v short-circuits. + - name: Merge canonical Claude Code permissions (ssh/tailscale/rsync/etc.) + tags: [ssh_claude_sync, claude_perms] + ansible.builtin.command: + cmd: "/usr/bin/env python3 {{ ansible_env.HOME }}/.claude/merge_claude_permissions.py" + args: + creates: "{{ ansible_env.HOME }}/.claude/.permissions-merged-v1" + register: claude_perms_merge + changed_when: "'merged:' in (claude_perms_merge.stdout | default(''))" diff --git a/infra/ansible/templates/ssh_config.j2 b/infra/ansible/templates/ssh_config.j2 new file mode 100644 index 0000000..360a909 --- /dev/null +++ b/infra/ansible/templates/ssh_config.j2 @@ -0,0 +1,78 @@ +# ========================================================================= +# MANAGED BY FACTORYLM ANSIBLE — DO NOT EDIT BY HAND +# Source: infra/ansible/templates/ssh_config.j2 +# Roster: /Users/Shared/cluster/betterclaw/memory/automated-agents-roster.md +# Edits land here via: ansible-playbook -i inventory.ini playbook.yml +# ========================================================================= + +Include {{ ansible_env.HOME }}/.colima/ssh_config + +# Global defaults — keep connections healthy, reuse via ControlMaster. +Host * + ServerAliveInterval 30 + ServerAliveCountMax 3 + TCPKeepAlive yes + ControlMaster auto + ControlPath ~/.ssh/cm-%r@%h:%p + ControlPersist 10m + +# ------------------------------------------------------------------------- +# ALPHA — orchestrator Mac Mini (Tailscale) +Host alpha + HostName 100.107.140.12 + User factorylm + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 + +# ------------------------------------------------------------------------- +# BRAVO — compute Mac Mini. Default = Tailscale; use bravo-lan for same-subnet. +Host bravo + HostName 100.86.236.11 + User bravonode + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 + +Host bravo-lan + HostName 192.168.1.11 + User bravonode + +Host bravo-tailscale + HostName 100.86.236.11 + User bravonode + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 + +# ------------------------------------------------------------------------- +# CHARLIE — KB Mac Mini (Tailscale — useful when running from ALPHA/BRAVO) +Host charlie + HostName 100.70.49.126 + User charlienode + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 + +# ------------------------------------------------------------------------- +# PLC laptop (Tailscale, Windows) +Host plc + HostName 100.72.2.99 + User hharp + +# ------------------------------------------------------------------------- +# TRAVEL laptop (Tailscale, Windows) +Host travel + HostName 100.83.251.23 + User hharp + +# ------------------------------------------------------------------------- +# FactoryLM production VPS — Tailscale-first, public-IP fallback. +Host prod factorylm-prod + HostName 100.68.120.99 + User root + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 + +Host prod-public factorylm-prod-public + HostName 165.245.138.91 + User root + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 + +# ------------------------------------------------------------------------- +# PI edge node (currently offline; entry kept for when it wakes) +Host pi + HostName 100.66.216.6 + User pi + IdentityFile {{ ansible_env.HOME }}/.ssh/id_ed25519 diff --git a/scripts/health-check.sh b/scripts/health-check.sh new file mode 100755 index 0000000..3581b0d --- /dev/null +++ b/scripts/health-check.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Minimal health check. Logs to /tmp/factorylm-health.log via plist redirect. +ts=$(date -u +%Y-%m-%dT%H:%M:%SZ) +load=$(uptime | awk -F'load averages:' '{print $2}' | xargs) +disk=$(df -h /System/Volumes/Data | awk 'NR==2 {print $5}') +qdrant=$(curl -sm 3 -o /dev/null -w "%{http_code}" http://localhost:8000/healthz 2>/dev/null) +ollama_bravo=$(curl -sm 3 -o /dev/null -w "%{http_code}" http://192.168.1.11:11434/api/version 2>/dev/null) +echo "$ts load=$load disk=$disk qdrant=$qdrant ollama_bravo=$ollama_bravo" diff --git a/services/brain/config.py b/services/brain/config.py index 5fd1d13..6b10f6a 100644 --- a/services/brain/config.py +++ b/services/brain/config.py @@ -1,7 +1,17 @@ """Mem0 configuration for Open Brain. -Wires up pgvector (Neon), Gemini embeddings, and Groq LLM (for fact extraction). +Wires up pgvector (Neon), Ollama embeddings, and Groq LLM (for fact extraction). Gemini free-tier JSON mode has very low rate limits, so we use Groq for LLM calls. + +Embeddings: Ollama nomic-embed-text (768d) running locally on CHARLIE. +Switched from Gemini gemini-embedding-001 on 2026-05-23 because the Gemini key +kept expiring. Both produce 768d vectors so the pgvector schema is unchanged, +but the vector spaces are different — the 5,493 memories embedded before this +switch will return lower-relevance results on cross-space queries until they +are re-embedded with `tools/brain_backfill.py`. + +Aligns with MIRA's knowledge_entries KB (also nomic-embed-text) for future +vector-space unification. """ from __future__ import annotations @@ -19,14 +29,14 @@ def get_memory() -> Memory: "config": { "connection_string": os.environ["NEON_DATABASE_URL"], "collection_name": "brain_memories", - "embedding_model_dims": 768, # Gemini gemini-embedding-001 (output_dimensionality=768) + "embedding_model_dims": 768, # nomic-embed-text (also 768d, matches prior Gemini schema) }, }, "embedder": { - "provider": "gemini", + "provider": "ollama", "config": { - "model": "models/gemini-embedding-001", - "api_key": os.environ.get("GEMINI_API_KEY"), + "model": "nomic-embed-text", + "ollama_base_url": os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434"), }, }, "llm": { diff --git a/services/mcp/brain_server.py b/services/mcp/brain_server.py index 84008b2..a200289 100644 --- a/services/mcp/brain_server.py +++ b/services/mcp/brain_server.py @@ -25,8 +25,9 @@ _memory = None _setup_error = None -# Check env vars on import so we can give a helpful message -_REQUIRED_VARS = ["NEON_DATABASE_URL", "GEMINI_API_KEY", "GROQ_API_KEY"] +# Check env vars on import so we can give a helpful message. +# Embeddings now use local Ollama (no API key required); see services/brain/config.py. +_REQUIRED_VARS = ["NEON_DATABASE_URL", "GROQ_API_KEY"] _missing = [v for v in _REQUIRED_VARS if not os.environ.get(v)] if _missing: _setup_error = ( @@ -34,8 +35,8 @@ "To set up: install Doppler CLI (doppler.com), then run the brain MCP server via " ".mcp.json which uses Doppler to inject secrets. " "Needed: NEON_DATABASE_URL (Doppler openclaw/dev), " - "GEMINI_API_KEY (Doppler factorylm/dev), " "GROQ_API_KEY (Doppler openclaw/dev). " + "Embeddings via local Ollama (nomic-embed-text on http://localhost:11434) — no API key. " "See CLAUDE.md 'Open Brain — Startup Protocol' for details." ) logger.warning("Brain MCP: %s", _setup_error) @@ -52,7 +53,8 @@ def _get_memory(): except Exception as e: raise RuntimeError( f"Brain failed to initialize: {e}. " - "Check that NEON_DATABASE_URL, GEMINI_API_KEY, GROQ_API_KEY are set correctly." + "Check that NEON_DATABASE_URL and GROQ_API_KEY are set, and that " + "Ollama is running locally with nomic-embed-text pulled." ) from e return _memory @@ -171,16 +173,9 @@ def brain_ingest_file(file_path: str, source: str = "repo", tags: list[str] = [] except RuntimeError as e: return {"error": str(e)} -<<<<<<< HEAD from kb.chunker import chunk_file chunks = chunk_file(file_path, extra_metadata={"tags": tags}) -======= - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - # Chunk large files by paragraphs - chunks = [c.strip() for c in content.split("\n\n") if c.strip() and len(c.strip()) > 50] ->>>>>>> 433ffad (feat(brain): graceful fallback + Open Brain startup protocol) if not chunks: with open(file_path, "r", encoding="utf-8") as f: @@ -256,4 +251,13 @@ def brain_stats() -> dict[str, Any]: # --------------------------------------------------------------------------- if __name__ == "__main__": - app.run() + # Honor MCP_TRANSPORT/MCP_HOST/MCP_PORT from the launchd plist. + # Without this, app.run() defaults to stdio and exits immediately + # when launched as a long-running service. + transport = os.environ.get("MCP_TRANSPORT", "stdio") + if transport in ("sse", "streamable-http"): + host = os.environ.get("MCP_HOST", "127.0.0.1") + port = int(os.environ.get("MCP_PORT", "8000")) + app.settings.host = host + app.settings.port = port + app.run(transport=transport) diff --git a/services/mes/alembic/versions/0002_add_cmms_ref.py b/services/mes/alembic/versions/0002_add_cmms_ref.py new file mode 100644 index 0000000..80ccb02 --- /dev/null +++ b/services/mes/alembic/versions/0002_add_cmms_ref.py @@ -0,0 +1,38 @@ +"""Add cmms_ref and cmms_synced_at to work_orders. + +Revision ID: 0002 +Revises: 0001 +Create Date: 2026-04-16 + +Adds two nullable columns to work_orders: + cmms_ref TEXT — GitHub Gist ID once synced to CMMS (NULL = not yet pushed) + cmms_synced_at TIMESTAMPTZ — timestamp of last successful CMMS push + +These are both nullable so existing rows are unaffected. +No seed data required. +""" + +from typing import Sequence, Union + +from alembic import op + +revision: str = "0002" +down_revision: Union[str, None] = "0001" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute(""" + ALTER TABLE work_orders + ADD COLUMN IF NOT EXISTS cmms_ref TEXT, + ADD COLUMN IF NOT EXISTS cmms_synced_at TIMESTAMPTZ; + """) + + +def downgrade() -> None: + op.execute(""" + ALTER TABLE work_orders + DROP COLUMN IF EXISTS cmms_ref, + DROP COLUMN IF EXISTS cmms_synced_at; + """) diff --git a/services/mes/backend/config.py b/services/mes/backend/config.py index f76f6b6..2e56ccd 100644 --- a/services/mes/backend/config.py +++ b/services/mes/backend/config.py @@ -24,8 +24,15 @@ class Settings(BaseSettings): # Polling interval in seconds (default 5, set lower in tests) plc_poll_interval_sec: int = 5 - # Set True to skip poller startup (useful in unit tests) + # OEE calculator tick interval in seconds (default 60) + oee_tick_sec: int = 60 + + # Set True to skip background task startup (useful in unit tests) plc_use_mock: bool = False + # CMMS sync via GitHub Gist — disabled by default (set token to enable) + cmms_enabled: bool = False + cmms_github_token: str = "" # GitHub PAT with gist scope (via Doppler) + settings = Settings() diff --git a/services/mes/backend/main.py b/services/mes/backend/main.py index 477dbfc..e23d5cf 100644 --- a/services/mes/backend/main.py +++ b/services/mes/backend/main.py @@ -1,12 +1,23 @@ """FactoryLM MES API — FastAPI entry point. Lifespan: - startup → seed state cache, launch background state poller - shutdown → signal poller to stop cleanly + startup → launch state poller + OEE calculator background tasks + shutdown → stop both tasks cleanly Routes (cumulative by week): Week 1: /api/health Week 2: /api/mes/lines, /api/mes/lines/{id}/state + Week 3: /api/mes/lines/{id}/oee, /api/mes/lines/{id}/oee/history + /api/mes/oee/summary, /api/mes/kpis + Week 4: /api/mes/products, /api/mes/products (POST/GET) + /api/mes/work-orders (POST/GET), /api/mes/work-orders/{id} (GET) + /api/mes/work-orders/{id}/status (PATCH) + Schedule-aware TEEP via schedules table + Week 5: /api/mes/downtime-reasons + /api/mes/lines/{id}/downtime (GET/POST) + NLP classifier: free-text → reason_code + Week 6: /api/mes/cmms/sync/{id} (POST/GET) — CMMS Gist push + status + /api/mes/cmms/ingest (POST) — import CMMS WO into MES """ import asyncio @@ -17,9 +28,13 @@ from fastapi.middleware.cors import CORSMiddleware from backend.config import settings +from backend.routes.cmms import router as cmms_router +from backend.routes.downtime import router as downtime_router from backend.routes.health import router as health_router from backend.routes.lines import router as lines_router -from backend.services import state_poller +from backend.routes.oee import router as oee_router +from backend.routes.work_orders import router as work_orders_router +from backend.services import oee_calculator, state_poller logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -30,25 +45,34 @@ async def lifespan(app: FastAPI): db_host = settings.database_url.split("@")[-1] logger.info("MES service starting — DB: %s PLC: %s", db_host, settings.plc_modbus_url) - poller_task = None + poller_task = oee_task = None + if not settings.plc_use_mock: poller_task = asyncio.create_task( state_poller.run(poll_interval_sec=settings.plc_poll_interval_sec), name="state_poller", ) - logger.info("State poller started (interval=%ds)", settings.plc_poll_interval_sec) + oee_task = asyncio.create_task( + oee_calculator.run(tick_sec=settings.oee_tick_sec), + name="oee_calculator", + ) + logger.info( + "Background tasks started — poller=%ds oee_tick=%ds", + settings.plc_poll_interval_sec, settings.oee_tick_sec, + ) else: - logger.info("PLC mock mode — state poller disabled") + logger.info("PLC mock mode — background tasks disabled") yield logger.info("MES service shutting down") - if poller_task: - state_poller.stop() + state_poller.stop() + oee_calculator.stop() + for task in [t for t in [poller_task, oee_task] if t]: try: - await asyncio.wait_for(poller_task, timeout=8.0) - except asyncio.TimeoutError: - poller_task.cancel() + await asyncio.wait_for(task, timeout=8.0) + except (asyncio.TimeoutError, asyncio.CancelledError): + task.cancel() app = FastAPI( @@ -65,8 +89,12 @@ async def lifespan(app: FastAPI): allow_headers=["*"], ) -app.include_router(health_router, prefix=settings.api_prefix) -app.include_router(lines_router, prefix=settings.api_prefix) +app.include_router(health_router, prefix=settings.api_prefix) +app.include_router(lines_router, prefix=settings.api_prefix) +app.include_router(oee_router, prefix=settings.api_prefix) +app.include_router(work_orders_router, prefix=settings.api_prefix) +app.include_router(downtime_router, prefix=settings.api_prefix) +app.include_router(cmms_router, prefix=settings.api_prefix) if __name__ == "__main__": diff --git a/services/mes/backend/models/db_models.py b/services/mes/backend/models/db_models.py index 94391d4..8a2811f 100644 --- a/services/mes/backend/models/db_models.py +++ b/services/mes/backend/models/db_models.py @@ -121,6 +121,8 @@ class WorkOrder(Base): actual_start = Column(DateTime(timezone=True)) actual_end = Column(DateTime(timezone=True)) notes = Column(Text) + cmms_ref = Column(Text) # GitHub Gist ID once synced + cmms_synced_at = Column(DateTime(timezone=True)) # timestamp of last CMMS push created_at = Column(DateTime(timezone=True), server_default=text("NOW()")) updated_at = Column(DateTime(timezone=True), server_default=text("NOW()"), onupdate=datetime.utcnow) diff --git a/services/mes/backend/routes/cmms.py b/services/mes/backend/routes/cmms.py new file mode 100644 index 0000000..3083a0a --- /dev/null +++ b/services/mes/backend/routes/cmms.py @@ -0,0 +1,183 @@ +"""CMMS sync routes — bidirectional work order sync via GitHub Gist. + +Week 6 endpoints: + POST /api/mes/cmms/sync/{work_order_id} — push WO to CMMS Gist + GET /api/mes/cmms/sync/{work_order_id} — sync status (cmms_ref, ts) + POST /api/mes/cmms/ingest — import CMMS work order → MES + +Controlled by settings.cmms_enabled: + False (default) — push returns a synthetic Gist reference, no HTTP calls. + True — calls GitHub Gist API with settings.cmms_github_token. +""" + +import logging +from datetime import datetime, timezone +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, Field +from sqlalchemy.orm import Session + +from backend.database import get_db +from backend.models.db_models import Line, Product, WorkOrder, WorkOrderStatus +from backend.models.mes_models import WorkOrderResponse +from backend.routes.work_orders import _to_response +from backend.services import cmms_client + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/mes/cmms", tags=["cmms"]) + + +# ── Response models ─────────────────────────────────────────────────────────── + +class CMMSSyncStatus(BaseModel): + work_order_id: str + order_number: str + cmms_ref: Optional[str] # Gist ID, None if never synced + cmms_gist_url: Optional[str] + cmms_synced_at: Optional[datetime] + synced: bool + + +class CMMSSyncResult(BaseModel): + work_order_id: str + order_number: str + gist_id: str + gist_url: str + cmms_synced_at: datetime + + +class CMMSIngestRequest(BaseModel): + """Inbound CMMS work order — resolves product by SKU, line by name.""" + order_number: str + product_sku: str + line_name: str + target_qty: int = Field(gt=0) + notes: Optional[str] = None + cmms_ref: Optional[str] = None # Gist ID if already exists in CMMS + + +# ── Endpoints ───────────────────────────────────────────────────────────────── + +@router.post("/sync/{work_order_id}", response_model=CMMSSyncResult) +def sync_work_order_to_cmms(work_order_id: str, db: Session = Depends(get_db)): + """Push a MES work order to CMMS (creates or updates Gist). + + Saves the returned Gist ID back to work_orders.cmms_ref. + Safe to call multiple times — updates the existing Gist on repeat. + """ + wo = db.query(WorkOrder).filter(WorkOrder.id == work_order_id).first() + if not wo: + raise HTTPException(status_code=404, detail=f"Work order '{work_order_id}' not found") + + line = db.query(Line).filter(Line.id == wo.line_id).first() + product = db.query(Product).filter(Product.id == wo.product_id).first() + + metadata = cmms_client.format_work_order( + order_number=wo.order_number, + status=wo.status.value, + target_qty=wo.target_qty, + good_qty=wo.good_qty, + line_name=line.name if line else "Unknown", + line_isa95_path=line.isa95_path if line else "", + product_sku=product.sku if product else "", + product_name=product.name if product else "Unknown", + notes=wo.notes, + actual_start=wo.actual_start, + actual_end=wo.actual_end, + cmms_external_id=wo.cmms_ref, + ) + + try: + result = cmms_client.push_work_order(metadata, gist_id=wo.cmms_ref) + except Exception as exc: + logger.exception("CMMS push failed for WO %s", wo.order_number) + raise HTTPException(status_code=502, detail=f"CMMS push failed: {exc}") from exc + + # Persist Gist reference + now = datetime.now(timezone.utc) + wo.cmms_ref = result["gist_id"] + wo.cmms_synced_at = now + db.commit() + + logger.info("CMMS sync: %s → %s", wo.order_number, result["gist_url"]) + return CMMSSyncResult( + work_order_id=str(wo.id), + order_number=wo.order_number, + gist_id=result["gist_id"], + gist_url=result["gist_url"], + cmms_synced_at=now, + ) + + +@router.get("/sync/{work_order_id}", response_model=CMMSSyncStatus) +def get_cmms_sync_status(work_order_id: str, db: Session = Depends(get_db)): + """Return the CMMS sync status for a work order.""" + wo = db.query(WorkOrder).filter(WorkOrder.id == work_order_id).first() + if not wo: + raise HTTPException(status_code=404, detail=f"Work order '{work_order_id}' not found") + + gist_url = ( + f"https://gist.github.com/{wo.cmms_ref}" if wo.cmms_ref else None + ) + return CMMSSyncStatus( + work_order_id=str(wo.id), + order_number=wo.order_number, + cmms_ref=wo.cmms_ref, + cmms_gist_url=gist_url, + cmms_synced_at=wo.cmms_synced_at, + synced=(wo.cmms_ref is not None), + ) + + +@router.post("/ingest", response_model=WorkOrderResponse, status_code=201) +def ingest_cmms_work_order(body: CMMSIngestRequest, db: Session = Depends(get_db)): + """Import a CMMS work order into the MES database. + + Resolves product by SKU and line by name. + If order_number already exists, returns the existing WO (idempotent). + Sets cmms_ref to mark this WO as CMMS-originated. + """ + # Idempotency — return existing WO if order_number already in DB + existing = db.query(WorkOrder).filter( + WorkOrder.order_number == body.order_number + ).first() + if existing: + logger.info("CMMS ingest: %s already exists, returning existing", body.order_number) + return _to_response(existing) + + # Resolve line by name + line = db.query(Line).filter(Line.name == body.line_name).first() + if not line: + raise HTTPException( + status_code=404, + detail=f"Line '{body.line_name}' not found. Available: Conveyor-1, Sorting-1", + ) + + # Resolve product by SKU + product = db.query(Product).filter(Product.sku == body.product_sku).first() + if not product: + raise HTTPException( + status_code=404, + detail=f"Product SKU '{body.product_sku}' not found. Create it first via POST /api/mes/products", + ) + + wo = WorkOrder( + order_number=body.order_number, + product_id=product.id, + line_id=line.id, + target_qty=body.target_qty, + notes=body.notes, + status=WorkOrderStatus.PENDING, + cmms_ref=body.cmms_ref, + cmms_synced_at=datetime.now(timezone.utc) if body.cmms_ref else None, + ) + db.add(wo) + db.commit() + db.refresh(wo) + logger.info( + "CMMS ingest: created WO %s (line=%s, sku=%s, cmms_ref=%s)", + wo.order_number, line.name, product.sku, wo.cmms_ref, + ) + return _to_response(wo) diff --git a/services/mes/backend/routes/downtime.py b/services/mes/backend/routes/downtime.py new file mode 100644 index 0000000..e0a0f85 --- /dev/null +++ b/services/mes/backend/routes/downtime.py @@ -0,0 +1,236 @@ +"""Downtime tracking routes — reason lookup, history, manual entry. + +Week 5 endpoints: + GET /api/mes/downtime-reasons — list all 14 reason codes + GET /api/mes/lines/{id}/downtime — downtime events for one line + POST /api/mes/lines/{id}/downtime — attach reason to open DOWN event + +POST accepts two modes: + Direct: { "reason_code": "JAM", "entered_by": "OPERATOR" } + NLP: { "description": "line jammed", "entered_by": "MIRA_AI" } + +If description is given the NLP classifier maps it to a reason_code. +If the classifier returns UNKNOWN a confidence="low" warning is included. +""" + +import logging +from datetime import datetime, timedelta, timezone +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from pydantic import BaseModel +from sqlalchemy import text +from sqlalchemy.orm import Session + +from backend.database import get_db +from backend.models.db_models import ( + DowntimeReason, + EnteredBy, + Line, + MachineState, + MachineStateEnum, +) +from backend.services.downtime_classifier import classify_reason + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/mes", tags=["downtime"]) + + +# ── Response models ─────────────────────────────────────────────────────────── + +class DowntimeReasonResponse(BaseModel): + code: str + description: str + category: str # PLANNED / UNPLANNED / EXTERNAL + + +class DowntimeEventResponse(BaseModel): + id: str + line_id: str + state: str + reason_code: Optional[str] + reason_desc: Optional[str] # joined from downtime_reasons + category: Optional[str] # PLANNED / UNPLANNED / EXTERNAL + entered_by: str + notes: Optional[str] + started_at: datetime + ended_at: Optional[datetime] + duration_min: Optional[int] # None if event is still open + + +class DowntimeEntryRequest(BaseModel): + reason_code: Optional[str] = None # direct code entry + description: Optional[str] = None # free-text → NLP classifier + entered_by: str = "OPERATOR" # OPERATOR | MIRA_AI + notes: Optional[str] = None + + +class DowntimeEntryResponse(BaseModel): + event: DowntimeEventResponse + classified: Optional[bool] = None # True if NLP was used + confidence: Optional[str] = None # "high" | "low" (NLP only) + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +_DOWN_STATES = (MachineStateEnum.DOWN.value, MachineStateEnum.CHANGEOVER.value) + + +def _to_event(row: MachineState, reason: Optional[DowntimeReason]) -> DowntimeEventResponse: + now = datetime.now(timezone.utc) + ended = row.ended_at + duration = None + if row.started_at: + end_ts = ended if ended else now + # Ensure both are timezone-aware before subtracting + start_ts = row.started_at + if start_ts.tzinfo is None: + start_ts = start_ts.replace(tzinfo=timezone.utc) + if end_ts.tzinfo is None: + end_ts = end_ts.replace(tzinfo=timezone.utc) + duration = max(0, int((end_ts - start_ts).total_seconds() / 60)) + + return DowntimeEventResponse( + id=str(row.id), + line_id=str(row.line_id), + state=row.state.value, + reason_code=row.reason_code, + reason_desc=reason.description if reason else None, + category=reason.category.value if reason else None, + entered_by=row.entered_by.value if row.entered_by else EnteredBy.PLC.value, + notes=row.notes, + started_at=row.started_at, + ended_at=row.ended_at, + duration_min=duration, + ) + + +# ── Endpoints ───────────────────────────────────────────────────────────────── + +@router.get("/downtime-reasons", response_model=list[DowntimeReasonResponse]) +def list_downtime_reasons(db: Session = Depends(get_db)): + """All configured downtime reason codes (seeded 14 standard codes).""" + reasons = db.query(DowntimeReason).order_by(DowntimeReason.category, DowntimeReason.code).all() + return [ + DowntimeReasonResponse( + code=r.code, + description=r.description, + category=r.category.value, + ) + for r in reasons + ] + + +@router.get("/lines/{line_id}/downtime", response_model=list[DowntimeEventResponse]) +def get_line_downtime( + line_id: str, + hours: int = Query(default=8, ge=1, le=72), + db: Session = Depends(get_db), +): + """Downtime events (DOWN + CHANGEOVER) for a line over the last N hours.""" + line = db.query(Line).filter(Line.id == line_id).first() + if not line: + raise HTTPException(status_code=404, detail=f"Line {line_id} not found") + + since = datetime.now(timezone.utc) - timedelta(hours=hours) + rows = ( + db.query(MachineState) + .filter( + MachineState.line_id == line_id, + MachineState.state.in_([MachineStateEnum.DOWN, MachineStateEnum.CHANGEOVER]), + MachineState.started_at >= since, + ) + .order_by(MachineState.started_at.desc()) + .all() + ) + + result = [] + for row in rows: + reason = ( + db.query(DowntimeReason).filter(DowntimeReason.code == row.reason_code).first() + if row.reason_code else None + ) + result.append(_to_event(row, reason)) + return result + + +@router.post("/lines/{line_id}/downtime", response_model=DowntimeEntryResponse) +def enter_downtime_reason( + line_id: str, + body: DowntimeEntryRequest, + db: Session = Depends(get_db), +): + """Attach a downtime reason to the current open DOWN or CHANGEOVER event. + + Accepts either a direct reason_code or a free-text description (NLP classified). + Returns 409 if the line has no open downtime event right now. + """ + line = db.query(Line).filter(Line.id == line_id).first() + if not line: + raise HTTPException(status_code=404, detail=f"Line {line_id} not found") + + # Validate entered_by + try: + entered_by = EnteredBy(body.entered_by.upper()) + except ValueError: + raise HTTPException(status_code=422, detail=f"Unknown entered_by '{body.entered_by}'") + + # Find the most recent open DOWN/CHANGEOVER event + open_event = ( + db.query(MachineState) + .filter( + MachineState.line_id == line_id, + MachineState.state.in_([MachineStateEnum.DOWN, MachineStateEnum.CHANGEOVER]), + MachineState.ended_at.is_(None), + ) + .order_by(MachineState.started_at.desc()) + .first() + ) + if not open_event: + raise HTTPException( + status_code=409, + detail=f"Line {line.name} has no open DOWN or CHANGEOVER event. " + "Start a downtime event first (state must be DOWN or CHANGEOVER).", + ) + + # Resolve reason_code — direct or NLP + classified = False + confidence = None + + if body.reason_code: + reason_code = body.reason_code.upper() + # Validate code exists + if not db.query(DowntimeReason).filter(DowntimeReason.code == reason_code).first(): + raise HTTPException(status_code=422, detail=f"Unknown reason code '{reason_code}'") + elif body.description: + reason_code, confidence = classify_reason(body.description) + classified = True + logger.info( + "NLP classified '%s' → %s (confidence=%s) for line %s", + body.description, reason_code, confidence, line.name, + ) + else: + raise HTTPException( + status_code=422, + detail="Provide either reason_code or description.", + ) + + # Update the open event + open_event.reason_code = reason_code + open_event.entered_by = entered_by + open_event.notes = body.notes + db.commit() + db.refresh(open_event) + + reason_obj = db.query(DowntimeReason).filter(DowntimeReason.code == reason_code).first() + logger.info( + "Downtime reason set: line=%s event=%s code=%s by=%s", + line.name, open_event.id, reason_code, entered_by.value, + ) + + return DowntimeEntryResponse( + event=_to_event(open_event, reason_obj), + classified=classified if classified else None, + confidence=confidence, + ) diff --git a/services/mes/backend/routes/oee.py b/services/mes/backend/routes/oee.py new file mode 100644 index 0000000..75d882e --- /dev/null +++ b/services/mes/backend/routes/oee.py @@ -0,0 +1,225 @@ +"""OEE routes — snapshots, history, fleet summary, KPIs. + +Week 3 endpoints: + GET /api/mes/lines/{id}/oee + GET /api/mes/lines/{id}/oee/history + GET /api/mes/oee/summary + GET /api/mes/kpis +""" + +import logging +from datetime import datetime, timedelta, timezone +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from pydantic import BaseModel +from sqlalchemy import text +from sqlalchemy.orm import Session + +from backend.database import get_db +from backend.models.db_models import Line, MachineStateEnum, OEESnapshot +from backend.services import oee_calculator, state_poller + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/mes", tags=["oee"]) + + +# ── Response models ─────────────────────────────────────────────────────────── + +class OEESnapshotResponse(BaseModel): + line_id: str + line_name: str + ts: datetime + availability: float + performance: float + quality: float + oee: float + teep: Optional[float] + run_time_sec: int + planned_time_sec: int + total_count: int + good_count: int + ideal_cycle_sec: float + low_oee_ticks: int # consecutive ticks below 60% (alert indicator) + + +class OEEHistoryItem(BaseModel): + ts: datetime + availability: float + performance: float + quality: float + oee: float + total_count: int + run_time_sec: int + + +class OEESummaryItem(BaseModel): + line_id: str + line_name: str + oee: float + availability: float + performance: float + quality: float + teep: Optional[float] + run_state: str + ts: Optional[datetime] + alert: bool # True if low_oee_ticks >= 30 + + +class KPIResponse(BaseModel): + fleet_oee: float # average OEE across all lines (latest tick) + downtime_minutes_today: int # total DOWN + OFFLINE minutes today, all lines + lines_in_alert: int # lines with OEE < 60% for 30+ min + snapshot_ts: datetime + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _latest_snapshot(db: Session, line_id: str) -> Optional[OEESnapshot]: + return ( + db.query(OEESnapshot) + .filter(OEESnapshot.line_id == line_id) + .order_by(OEESnapshot.ts.desc()) + .first() + ) + + +def _downtime_minutes_today(db: Session, line_id: str) -> int: + """Sum minutes in DOWN or OFFLINE state since midnight UTC today.""" + midnight = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + row = db.execute( + text(""" + SELECT COALESCE(SUM( + EXTRACT(EPOCH FROM ( + COALESCE(ended_at, NOW()) - GREATEST(started_at, :midnight) + )) + ), 0) / 60 + FROM machine_states + WHERE line_id = :lid + AND state IN ('DOWN', 'OFFLINE') + AND COALESCE(ended_at, NOW()) >= :midnight + """), + {"lid": line_id, "midnight": midnight}, + ).scalar() + return int(row or 0) + + +# ── Endpoints ───────────────────────────────────────────────────────────────── + +@router.get("/lines/{line_id}/oee", response_model=OEESnapshotResponse) +def get_line_oee(line_id: str, db: Session = Depends(get_db)): + """Latest OEE snapshot for one line.""" + line = db.query(Line).filter(Line.id == line_id).first() + if not line: + raise HTTPException(status_code=404, detail=f"Line {line_id} not found") + + snap = _latest_snapshot(db, line_id) + if not snap: + raise HTTPException( + status_code=404, + detail="No OEE snapshots yet — calculator ticks every 60s", + ) + + return OEESnapshotResponse( + line_id=line_id, + line_name=line.name, + ts=snap.ts, + availability=snap.availability, + performance=snap.performance, + quality=snap.quality, + oee=snap.oee, + teep=snap.teep, + run_time_sec=snap.run_time_sec, + planned_time_sec=snap.planned_time_sec, + total_count=snap.total_count, + good_count=snap.good_count, + ideal_cycle_sec=snap.ideal_cycle_sec, + low_oee_ticks=oee_calculator.get_low_oee_ticks(line_id), + ) + + +@router.get("/lines/{line_id}/oee/history", response_model=list[OEEHistoryItem]) +def get_line_oee_history( + line_id: str, + hours: int = Query(default=8, ge=1, le=72), + db: Session = Depends(get_db), +): + """OEE time-series for one line over the last N hours (default 8).""" + line = db.query(Line).filter(Line.id == line_id).first() + if not line: + raise HTTPException(status_code=404, detail=f"Line {line_id} not found") + + since = datetime.now(timezone.utc) - timedelta(hours=hours) + snaps = ( + db.query(OEESnapshot) + .filter(OEESnapshot.line_id == line_id, OEESnapshot.ts >= since) + .order_by(OEESnapshot.ts.asc()) + .all() + ) + return [ + OEEHistoryItem( + ts=s.ts, + availability=s.availability, + performance=s.performance, + quality=s.quality, + oee=s.oee, + total_count=s.total_count, + run_time_sec=s.run_time_sec, + ) + for s in snaps + ] + + +@router.get("/oee/summary", response_model=list[OEESummaryItem]) +def get_oee_summary(db: Session = Depends(get_db)): + """Fleet OEE — latest snapshot for every configured line.""" + lines = db.query(Line).order_by(Line.name).all() + result = [] + for line in lines: + line_id = str(line.id) + snap = _latest_snapshot(db, line_id) + state = state_poller.get_cached_state(line_id) + low_ticks = oee_calculator.get_low_oee_ticks(line_id) + result.append(OEESummaryItem( + line_id=line_id, + line_name=line.name, + oee=snap.oee if snap else 0.0, + availability=snap.availability if snap else 0.0, + performance=snap.performance if snap else 0.0, + quality=snap.quality if snap else 0.0, + teep=snap.teep if snap else None, + run_state=state.value if state else MachineStateEnum.OFFLINE.value, + ts=snap.ts if snap else None, + alert=(low_ticks >= oee_calculator.OEE_ALERT_TICKS), + )) + return result + + +@router.get("/kpis", response_model=KPIResponse) +def get_kpis(db: Session = Depends(get_db)): + """Aggregate KPIs: fleet OEE, total downtime minutes today, alert count.""" + lines = db.query(Line).all() + oee_values = [] + total_downtime = 0 + alert_count = 0 + + for line in lines: + line_id = str(line.id) + snap = _latest_snapshot(db, line_id) + if snap: + oee_values.append(snap.oee) + total_downtime += _downtime_minutes_today(db, line_id) + if oee_calculator.get_low_oee_ticks(line_id) >= oee_calculator.OEE_ALERT_TICKS: + alert_count += 1 + + fleet_oee = round(sum(oee_values) / len(oee_values), 4) if oee_values else 0.0 + + return KPIResponse( + fleet_oee=fleet_oee, + downtime_minutes_today=total_downtime, + lines_in_alert=alert_count, + snapshot_ts=datetime.now(timezone.utc), + ) diff --git a/services/mes/backend/routes/work_orders.py b/services/mes/backend/routes/work_orders.py new file mode 100644 index 0000000..fc35097 --- /dev/null +++ b/services/mes/backend/routes/work_orders.py @@ -0,0 +1,255 @@ +"""Work Order routes — CRUD + status transitions. + +Week 4 endpoints: + POST /api/mes/products — create a product (SKU + ideal cycle) + GET /api/mes/products — list all products + POST /api/mes/work-orders — create a work order + GET /api/mes/work-orders — list (filter ?line_id= ?status=) + GET /api/mes/work-orders/{id} — detail + PATCH /api/mes/work-orders/{id}/status — PENDING→ACTIVE→COMPLETE / CANCELLED + +Transition rules: + PENDING → ACTIVE (start the job) + ACTIVE → COMPLETE (job done) + ACTIVE → CANCELLED + PENDING → CANCELLED + +Constraint: one line can only have ONE ACTIVE work order at a time (409 on violation). +""" + +import logging +from datetime import datetime, timezone +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from pydantic import BaseModel, Field +from sqlalchemy.orm import Session + +from backend.database import get_db +from backend.models.db_models import Line, Product, WorkOrder, WorkOrderStatus +from backend.models.mes_models import WorkOrderCreate, WorkOrderResponse + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/mes", tags=["work-orders"]) + + +# ── Valid status transitions ─────────────────────────────────────────────────── + +_ALLOWED_TRANSITIONS = { + WorkOrderStatus.PENDING: {WorkOrderStatus.ACTIVE, WorkOrderStatus.CANCELLED}, + WorkOrderStatus.ACTIVE: {WorkOrderStatus.COMPLETE, WorkOrderStatus.CANCELLED}, + WorkOrderStatus.PAUSED: {WorkOrderStatus.ACTIVE, WorkOrderStatus.CANCELLED}, + WorkOrderStatus.COMPLETE: set(), + WorkOrderStatus.CANCELLED: set(), +} + + +# ── Request / response models ────────────────────────────────────────────────── + +class ProductCreate(BaseModel): + sku: str + name: str + ideal_cycle_sec: float = Field(gt=0, description="Seconds per part at 100% performance") + description: Optional[str] = None + + +class ProductResponse(BaseModel): + id: str + sku: str + name: str + ideal_cycle_sec: float + description: Optional[str] = None + + model_config = {"from_attributes": True} + + +class WorkOrderStatusUpdate(BaseModel): + status: str # validated against allowed transitions at runtime + + +# ── Product endpoints ────────────────────────────────────────────────────────── + +@router.post("/products", response_model=ProductResponse, status_code=201) +def create_product(body: ProductCreate, db: Session = Depends(get_db)): + """Create a new product SKU with an ideal cycle time.""" + existing = db.query(Product).filter(Product.sku == body.sku).first() + if existing: + raise HTTPException(status_code=409, detail=f"Product SKU '{body.sku}' already exists") + + product = Product( + sku=body.sku, + name=body.name, + ideal_cycle_sec=body.ideal_cycle_sec, + description=body.description, + ) + db.add(product) + db.commit() + db.refresh(product) + logger.info("Product created: %s (%.2fs/part)", product.sku, product.ideal_cycle_sec) + return ProductResponse( + id=str(product.id), + sku=product.sku, + name=product.name, + ideal_cycle_sec=product.ideal_cycle_sec, + description=product.description, + ) + + +@router.get("/products", response_model=list[ProductResponse]) +def list_products(db: Session = Depends(get_db)): + """List all products.""" + products = db.query(Product).order_by(Product.sku).all() + return [ + ProductResponse( + id=str(p.id), + sku=p.sku, + name=p.name, + ideal_cycle_sec=p.ideal_cycle_sec, + description=p.description, + ) + for p in products + ] + + +# ── Work order endpoints ─────────────────────────────────────────────────────── + +@router.post("/work-orders", response_model=WorkOrderResponse, status_code=201) +def create_work_order(body: WorkOrderCreate, db: Session = Depends(get_db)): + """Create a new work order (status = PENDING).""" + # Validate line exists + line = db.query(Line).filter(Line.id == body.line_id).first() + if not line: + raise HTTPException(status_code=404, detail=f"Line '{body.line_id}' not found") + + # Validate product exists + product = db.query(Product).filter(Product.id == body.product_id).first() + if not product: + raise HTTPException(status_code=404, detail=f"Product '{body.product_id}' not found") + + # Unique order number + if db.query(WorkOrder).filter(WorkOrder.order_number == body.order_number).first(): + raise HTTPException(status_code=409, detail=f"Order number '{body.order_number}' already exists") + + wo = WorkOrder( + order_number=body.order_number, + product_id=body.product_id, + line_id=body.line_id, + target_qty=body.target_qty, + scheduled_start=body.scheduled_start, + notes=body.notes, + status=WorkOrderStatus.PENDING, + ) + db.add(wo) + db.commit() + db.refresh(wo) + logger.info("Work order created: %s → line=%s qty=%d", wo.order_number, line.name, wo.target_qty) + return _to_response(wo) + + +@router.get("/work-orders", response_model=list[WorkOrderResponse]) +def list_work_orders( + line_id: Optional[str] = Query(default=None), + status: Optional[str] = Query(default=None), + db: Session = Depends(get_db), +): + """List work orders, optionally filtered by line_id and/or status.""" + q = db.query(WorkOrder) + if line_id: + q = q.filter(WorkOrder.line_id == line_id) + if status: + try: + s = WorkOrderStatus(status.upper()) + except ValueError: + raise HTTPException(status_code=422, detail=f"Unknown status '{status}'") + q = q.filter(WorkOrder.status == s) + return [_to_response(wo) for wo in q.order_by(WorkOrder.created_at.desc()).all()] + + +@router.get("/work-orders/{work_order_id}", response_model=WorkOrderResponse) +def get_work_order(work_order_id: str, db: Session = Depends(get_db)): + """Get a single work order by ID.""" + wo = db.query(WorkOrder).filter(WorkOrder.id == work_order_id).first() + if not wo: + raise HTTPException(status_code=404, detail=f"Work order '{work_order_id}' not found") + return _to_response(wo) + + +@router.patch("/work-orders/{work_order_id}/status", response_model=WorkOrderResponse) +def update_work_order_status( + work_order_id: str, + body: WorkOrderStatusUpdate, + db: Session = Depends(get_db), +): + """Transition a work order's status. + + PENDING → ACTIVE → COMPLETE + PENDING → CANCELLED + ACTIVE → CANCELLED + """ + wo = db.query(WorkOrder).filter(WorkOrder.id == work_order_id).first() + if not wo: + raise HTTPException(status_code=404, detail=f"Work order '{work_order_id}' not found") + + # Parse and validate new status + try: + new_status = WorkOrderStatus(body.status.upper()) + except ValueError: + raise HTTPException(status_code=422, detail=f"Unknown status '{body.status}'") + + allowed = _ALLOWED_TRANSITIONS.get(wo.status, set()) + if new_status not in allowed: + raise HTTPException( + status_code=422, + detail=f"Cannot transition from {wo.status.value} to {new_status.value}. " + f"Allowed: {[s.value for s in allowed] or 'none'}", + ) + + # Enforce one-ACTIVE-per-line + if new_status == WorkOrderStatus.ACTIVE: + conflict = ( + db.query(WorkOrder) + .filter( + WorkOrder.line_id == wo.line_id, + WorkOrder.status == WorkOrderStatus.ACTIVE, + WorkOrder.id != wo.id, + ) + .first() + ) + if conflict: + raise HTTPException( + status_code=409, + detail=f"Line already has an active work order: {conflict.order_number}", + ) + + # Apply transition + now = datetime.now(timezone.utc) + if new_status == WorkOrderStatus.ACTIVE and wo.actual_start is None: + wo.actual_start = now + elif new_status in (WorkOrderStatus.COMPLETE, WorkOrderStatus.CANCELLED): + wo.actual_end = now + + wo.status = new_status + db.commit() + db.refresh(wo) + logger.info("Work order %s → %s", wo.order_number, new_status.value) + return _to_response(wo) + + +# ── Helper ──────────────────────────────────────────────────────────────────── + +def _to_response(wo: WorkOrder) -> WorkOrderResponse: + return WorkOrderResponse( + id=str(wo.id), + order_number=wo.order_number, + product_id=str(wo.product_id), + line_id=str(wo.line_id), + target_qty=wo.target_qty, + good_qty=wo.good_qty, + status=wo.status.value, + scheduled_start=wo.scheduled_start, + actual_start=wo.actual_start, + actual_end=wo.actual_end, + notes=wo.notes, + created_at=wo.created_at, + ) diff --git a/services/mes/backend/services/cmms_client.py b/services/mes/backend/services/cmms_client.py new file mode 100644 index 0000000..c6a36fa --- /dev/null +++ b/services/mes/backend/services/cmms_client.py @@ -0,0 +1,194 @@ +"""CMMS adapter — bidirectional sync via GitHub Gist. + +Pushes MES work orders to GitHub Gists as portable CMMS documents +(Markdown + CSV). Any CMMS that can import CSV can consume these. + +Uses the same document schema as cmms/gist_work_order.py but calls +the GitHub API directly via httpx (no subprocess / gh CLI dependency). + +Controlled by settings.cmms_enabled: + False (default) — push_work_order() returns a synthetic response. + Safe for tests and environments without a GitHub token. + True — calls the real GitHub Gist API. Requires + settings.cmms_github_token (GitHub PAT, gist scope). + +CSV columns match cmms/gist_work_order.py CSV_COLUMNS for compatibility. +""" + +import csv +import io +import logging +from datetime import datetime, timezone +from typing import Optional + +import httpx + +from backend.config import settings + +logger = logging.getLogger(__name__) + +_GITHUB_GIST_API = "https://api.github.com/gists" + +# CSV schema — matches cmms/gist_work_order.py for cross-tool compat +_CSV_COLUMNS = [ + "work_order_id", "title", "status", "priority", "asset_name", + "asset_id", "location", "site", "assigned_to", "assigned_team", + "work_type", "category", "due_date", "created_date", "completed_date", + "completed_by", "reported_by", "channel", "estimated_hours", "cost", + "completion_notes", "failure_code", "description", "cmms_system", + "cmms_external_id", +] + + +# ── Formatting ──────────────────────────────────────────────────────────────── + +def format_work_order( + order_number: str, + status: str, + target_qty: int, + good_qty: int, + line_name: str, + line_isa95_path: str, + product_sku: str, + product_name: str, + notes: Optional[str] = None, + actual_start: Optional[datetime] = None, + actual_end: Optional[datetime] = None, + cmms_external_id: Optional[str] = None, +) -> dict: + """Map MES work order fields to CMMS Gist metadata dict (pure function).""" + title = f"{product_name} — {line_name}" + return { + "work_order_id": order_number, + "title": title, + "status": status.lower(), + "priority": "normal", + "asset_name": line_name, + "asset_id": line_isa95_path, + "location": line_isa95_path, + "site": "Lake Wales, FL", + "assigned_to": "", + "assigned_team": "Production", + "work_type": "Production", + "category": "Manufacturing", + "due_date": "", + "created_date": datetime.now(timezone.utc).isoformat(), + "completed_date": actual_end.isoformat() if actual_end else "", + "completed_by": "", + "reported_by": "MIRA MES", + "channel": "MES API", + "estimated_hours": str(round(target_qty / 3600, 2)), # rough estimate + "cost": "", + "completion_notes": notes or "", + "failure_code": "", + "description": ( + f"Production run: {target_qty} units of {product_sku} ({product_name}) " + f"on {line_name}. Good parts: {good_qty}." + ), + "cmms_system": "FactoryLM MES", + "cmms_external_id": cmms_external_id or "", + } + + +def _render_md(metadata: dict) -> str: + """Render minimal Markdown work order document.""" + return f"""# Work Order: {metadata['work_order_id']} + +**Title:** {metadata['title']} +**Status:** {metadata['status']} +**Priority:** {metadata['priority']} +**Asset:** {metadata['asset_name']} (`{metadata['asset_id']}`) +**Site:** {metadata['site']} +**Team:** {metadata['assigned_team']} +**Work Type:** {metadata['work_type']} +**Created:** {metadata['created_date']} +**Completed:** {metadata.get('completed_date') or '—'} +**Reported by:** {metadata['reported_by']} +**System:** {metadata['cmms_system']} + +## Description + +{metadata['description']} + +## Notes + +{metadata.get('completion_notes') or '—'} + +--- +*Generated by FactoryLM MES — {metadata['site']}* +""" + + +def _render_csv(metadata: dict) -> str: + """Render CSV with header + one data row.""" + buf = io.StringIO(newline="") + writer = csv.writer(buf, lineterminator="\n") + writer.writerow(_CSV_COLUMNS) + writer.writerow([metadata.get(col, "") for col in _CSV_COLUMNS]) + return buf.getvalue() + + +def _gist_files(metadata: dict) -> dict: + """Build the Gist files payload for the GitHub API.""" + return { + "work-order.md": {"content": _render_md(metadata)}, + "work-order.csv": {"content": _render_csv(metadata)}, + } + + +# ── GitHub Gist API ─────────────────────────────────────────────────────────── + +def push_work_order(metadata: dict, gist_id: Optional[str] = None) -> dict: + """Create or update a GitHub Gist for a CMMS work order. + + Args: + metadata: Output of format_work_order(). + gist_id: If provided, PATCH the existing Gist; otherwise POST a new one. + + Returns: + {"gist_id": str, "gist_url": str} + + When settings.cmms_enabled is False, returns a synthetic response and + makes no HTTP calls — safe for tests and non-CMMS environments. + """ + if not settings.cmms_enabled: + synthetic_id = f"mock-{metadata['work_order_id'].lower().replace('-', '')}" + logger.info("CMMS disabled — synthetic push for %s", metadata["work_order_id"]) + return { + "gist_id": gist_id or synthetic_id, + "gist_url": f"https://gist.github.com/{gist_id or synthetic_id}", + } + + if not settings.cmms_github_token: + raise RuntimeError("cmms_enabled=True but cmms_github_token is empty") + + headers = { + "Authorization": f"Bearer {settings.cmms_github_token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + files = _gist_files(metadata) + wo_id = metadata["work_order_id"] + description = f"[FactoryLM MES] {wo_id} — {metadata['title']}" + + with httpx.Client(timeout=10.0) as client: + if gist_id: + # Update existing Gist + resp = client.patch( + f"{_GITHUB_GIST_API}/{gist_id}", + headers=headers, + json={"description": description, "files": files}, + ) + else: + # Create new Gist + resp = client.post( + _GITHUB_GIST_API, + headers=headers, + json={"description": description, "files": files, "public": False}, + ) + + resp.raise_for_status() + data = resp.json() + result = {"gist_id": data["id"], "gist_url": data["html_url"]} + logger.info("CMMS Gist %s: %s", "updated" if gist_id else "created", result["gist_url"]) + return result diff --git a/services/mes/backend/services/downtime_classifier.py b/services/mes/backend/services/downtime_classifier.py new file mode 100644 index 0000000..b80d4cc --- /dev/null +++ b/services/mes/backend/services/downtime_classifier.py @@ -0,0 +1,90 @@ +"""Downtime reason classifier — maps free-text descriptions to reason codes. + +Pure function, no I/O, no LLM required. +Used when MIRA or an operator submits a plain-English description of why +a line is down (e.g. "the conveyor is jammed" → JAM). + +Keyword priority table (first match wins — order matters): + E_STOP estop, e-stop, emergency stop + MAINT_PM pm, preventive, scheduled maint, planned maint + MAINT_BREAKDOWN breakdown, broken, failed, fault (generic) + CHANGEOVER_TOOLING tooling, tool change + CHANGEOVER_PRODUCT changeover, product change, switchover, new run + JAM jam, jammed, stuck, snagged, wedged + STARVED_MATERIAL starved, no material, empty, feed, material + BLOCKED_DOWNSTREAM blocked, downstream, full, backing up + QUALITY_HOLD quality, hold, inspection, reject, rework + OVERLOAD overload, overcurrent + OVERHEAT overheat, hot, thermal, temperature + SENSOR_FAIL sensor, proximity, photoelectric, switch + COMMS_FAIL comms, communication, timeout, network, modbus + UNKNOWN (fallback) + +Returns: + (reason_code: str, confidence: str) + confidence = "high" if keyword found, "low" if fallback to UNKNOWN. +""" + +import re +from typing import Tuple + +# ── Keyword map (ordered — first match wins) ────────────────────────────────── + +_RULES = [ + ("E_STOP", ["estop", "e-stop", "e stop", "emergency stop"]), + ("MAINT_PM", ["preventive maint", "planned maint", "scheduled maint", + r"\bpm\b"]), + ("MAINT_BREAKDOWN", ["breakdown", "broke down", "broken", "out of service"]), + ("CHANGEOVER_TOOLING", ["tooling change", "tool change", "new tooling", "tooling"]), + ("CHANGEOVER_PRODUCT", ["changeover", "product change", "switchover", "new run", + "new product"]), + ("JAM", ["jammed", "jam", "stuck", "snagged", "wedged", "tangled"]), + ("STARVED_MATERIAL", ["starved", "no material", "out of material", "empty", + "no stock", "feed empty"]), + ("BLOCKED_DOWNSTREAM", ["blocked", "downstream", "backing up", "full", "backlog"]), + ("QUALITY_HOLD", ["quality hold", "quality issue", "hold", "inspection", + "reject", "rework", "scrap"]), + ("OVERLOAD", ["overload", "overcurrent", "tripped"]), + ("OVERHEAT", ["overheat", "overheating", "hot", "thermal", "temperature"]), + ("SENSOR_FAIL", ["sensor failed", "sensor failure", "sensor fault", + "sensor", "proximity", "photoelectric", "limit switch", + "detector"]), + ("COMMS_FAIL", ["comms", "communication", "timeout", "network", "modbus", + "lost connection"]), +] + + +def classify_reason(text: str) -> Tuple[str, str]: + """Classify free-text downtime description into a reason code. + + Args: + text: Free-text description from an operator or MIRA chat. + + Returns: + (reason_code, confidence) — confidence is "high" or "low". + Always returns a valid reason_code string (fallback: "UNKNOWN"). + + Examples: + >>> classify_reason("the conveyor is jammed") + ('JAM', 'high') + >>> classify_reason("scheduled PM window") + ('MAINT_PM', 'high') + >>> classify_reason("some weird thing happened") + ('UNKNOWN', 'low') + """ + if not text or not text.strip(): + return ("UNKNOWN", "low") + + normalised = text.lower().strip() + + for reason_code, patterns in _RULES: + for pattern in patterns: + # Treat pattern as regex if it starts with \b, else literal substring + if pattern.startswith(r"\b") or pattern.startswith("("): + if re.search(pattern, normalised): + return (reason_code, "high") + else: + if pattern in normalised: + return (reason_code, "high") + + return ("UNKNOWN", "low") diff --git a/services/mes/backend/services/oee_calculator.py b/services/mes/backend/services/oee_calculator.py new file mode 100644 index 0000000..4b21358 --- /dev/null +++ b/services/mes/backend/services/oee_calculator.py @@ -0,0 +1,334 @@ +"""OEE Calculator — 60-second tick per active line. + +Computes Availability, Performance, Quality, OEE, and TEEP from: + - ItemCount delta (HR100) via plc-modbus HTTP + - RUNNING-state duration from machine_states table + - Active work order's ideal_cycle_sec (from products table, default 1.0) + - Schedule utilisation (from schedules table — Week 4) + +Writes one oee_snapshots row per line per tick. +Raises an in-process alert when a line's OEE < 0.60 for 30+ consecutive +minutes (30 ticks at 60s each). + +Formula: + Availability = run_time_sec / planned_time_sec (clamped 0-1) + Performance = (ideal_cycle_sec * total_count) / run_time_sec (clamped 0-1) + Quality = good_count / total_count (clamped 0-1) + OEE = A * P * Q + TEEP = OEE * utilisation + Utilisation = 1.0 if line is within a scheduled shift, else 0.0 + Falls back to 1.0 when no schedules exist (Week 3 compat) +""" + +import asyncio +import logging +from datetime import datetime, timedelta, timezone +from typing import Optional + +from sqlalchemy import text +from sqlalchemy.orm import Session + +from backend.config import settings +from backend.database import SessionLocal +from backend.models.db_models import Line, MachineStateEnum, OEESnapshot +from backend.services import state_poller +from backend.services.plc_client import PLCOfflineError, fetch_io, item_count + +logger = logging.getLogger(__name__) + +# ── Constants ───────────────────────────────────────────────────────────────── + +TICK_SEC = 60 # OEE snapshot interval +OEE_ALERT_THRESHOLD = 0.60 # alert below this +OEE_ALERT_TICKS = 30 # consecutive ticks below threshold before alert (30 min) +DEFAULT_IDEAL_CYCLE_SEC = 1.0 + +# ── In-memory state ─────────────────────────────────────────────────────────── + +# {line_id: int} — ItemCount at previous tick (cumulative register) +_last_count: dict = {} + +# {line_id: int} — consecutive ticks below OEE_ALERT_THRESHOLD +_low_oee_ticks: dict = {} + +_stop_event: Optional[asyncio.Event] = None + + +# ── OEE math (pure functions) ───────────────────────────────────────────────── + +def _clamp(value: float, lo: float = 0.0, hi: float = 1.0) -> float: + return max(lo, min(hi, value)) + + +def compute_oee( + run_time_sec: int, + planned_time_sec: int, + total_count: int, + good_count: int, + ideal_cycle_sec: float, + utilisation: float = 1.0, +) -> dict: + """Return dict with availability, performance, quality, oee, teep. + + All outputs clamped to [0.0, 1.0]. + Division-by-zero safe — returns 0.0 on any degenerate input. + + Args: + utilisation: Fraction of this period covered by a shift schedule. + 1.0 = fully scheduled (or no schedule defined). + 0.0 = outside any schedule. + TEEP = OEE * utilisation. + """ + if planned_time_sec <= 0: + return dict(availability=0.0, performance=0.0, quality=0.0, oee=0.0, teep=0.0) + + availability = _clamp(run_time_sec / planned_time_sec) + + if run_time_sec > 0 and total_count > 0: + performance = _clamp((ideal_cycle_sec * total_count) / run_time_sec) + else: + performance = 0.0 + + quality = _clamp(good_count / total_count) if total_count > 0 else 0.0 + + oee = availability * performance * quality + teep = _clamp(oee * _clamp(utilisation)) + + return dict( + availability=round(availability, 4), + performance=round(performance, 4), + quality=round(quality, 4), + oee=round(oee, 4), + teep=round(teep, 4), + ) + + +# ── DB helpers ──────────────────────────────────────────────────────────────── + +def _run_time_in_window(db: Session, line_id: str, window_sec: int) -> int: + """Sum seconds spent in RUNNING state in the last window_sec seconds.""" + cutoff = datetime.now(timezone.utc) - timedelta(seconds=window_sec) + rows = db.execute( + text(""" + SELECT + GREATEST(started_at, :cutoff) AS seg_start, + COALESCE(ended_at, NOW()) AS seg_end + FROM machine_states + WHERE line_id = :lid + AND state = 'RUNNING' + AND COALESCE(ended_at, NOW()) >= :cutoff + """), + {"lid": line_id, "cutoff": cutoff}, + ).fetchall() + total = 0 + for row in rows: + seg_start, seg_end = row[0], row[1] + if seg_end > seg_start: + total += int((seg_end - seg_start).total_seconds()) + return min(total, window_sec) + + +def _active_ideal_cycle(db: Session, line_id: str) -> float: + """Return ideal_cycle_sec from the active work order's product, or default.""" + row = db.execute( + text(""" + SELECT p.ideal_cycle_sec + FROM work_orders wo + JOIN products p ON p.id = wo.product_id + WHERE wo.line_id = :lid AND wo.status = 'ACTIVE' + LIMIT 1 + """), + {"lid": line_id}, + ).fetchone() + return float(row[0]) if row else DEFAULT_IDEAL_CYCLE_SEC + + +def _active_utilisation(db: Session, line_id: str) -> float: + """Return schedule utilisation for the current tick window. + + - If no schedules exist for this line at all → 1.0 (Week 3 compat). + - If schedules exist but none cover NOW → 0.0 (outside scheduled time). + - If a schedule covers NOW → 1.0 (line is within its shift). + """ + # Check whether any schedules exist for this line + has_schedules = db.execute( + text("SELECT 1 FROM schedules WHERE line_id = :lid LIMIT 1"), + {"lid": line_id}, + ).fetchone() + if not has_schedules: + return 1.0 # no schedule defined — preserve Week 3 behaviour + + # Check if NOW falls inside an active schedule entry + in_schedule = db.execute( + text(""" + SELECT 1 FROM schedules + WHERE line_id = :lid + AND planned_start <= NOW() + AND planned_end >= NOW() + LIMIT 1 + """), + {"lid": line_id}, + ).fetchone() + return 1.0 if in_schedule else 0.0 + + +def _active_work_order_id(db: Session, line_id: str) -> Optional[str]: + row = db.execute( + text("SELECT id FROM work_orders WHERE line_id=:lid AND status='ACTIVE' LIMIT 1"), + {"lid": line_id}, + ).fetchone() + return str(row[0]) if row else None + + +def _write_snapshot(db: Session, line_id: str, work_order_id: Optional[str], + run_time_sec: int, planned_time_sec: int, + total_count: int, good_count: int, + ideal_cycle_sec: float, metrics: dict) -> None: + snap = OEESnapshot( + line_id=line_id, + work_order_id=work_order_id, + ts=datetime.now(timezone.utc), + run_time_sec=run_time_sec, + planned_time_sec=planned_time_sec, + total_count=total_count, + good_count=good_count, + ideal_cycle_sec=ideal_cycle_sec, + availability=metrics["availability"], + performance=metrics["performance"], + quality=metrics["quality"], + oee=metrics["oee"], + teep=metrics["teep"], + ) + db.add(snap) + + +# ── Alert logic ─────────────────────────────────────────────────────────────── + +def _check_alert(line_name: str, line_id: str, oee: float) -> None: + if oee < OEE_ALERT_THRESHOLD: + _low_oee_ticks[line_id] = _low_oee_ticks.get(line_id, 0) + 1 + if _low_oee_ticks[line_id] == OEE_ALERT_TICKS: + logger.warning( + "ALERT Line %-12s OEE %.1f%% below %.0f%% for %d consecutive minutes", + line_name, oee * 100, OEE_ALERT_THRESHOLD * 100, OEE_ALERT_TICKS, + ) + else: + _low_oee_ticks[line_id] = 0 + + +# ── Per-line tick ───────────────────────────────────────────────────────────── + +async def _tick_line(line: Line) -> None: + """Compute and persist one OEE snapshot for a single line.""" + line_id = str(line.id) + + # Skip if OFFLINE — no meaningful OEE + current_state = state_poller.get_cached_state(line_id) + is_offline = (current_state == MachineStateEnum.OFFLINE or current_state is None) + planned_time_sec = 0 if is_offline else TICK_SEC + + # Fetch current item count (non-blocking) + total_count = 0 + try: + io_data = await fetch_io(settings.plc_modbus_url) + current_count = item_count(io_data) + prev = _last_count.get(line_id) + if prev is None: + # First tick — seed without counting; no items to credit yet + _last_count[line_id] = current_count + logger.debug("Line %s — seeding count at %d", line.name, current_count) + return + delta = max(0, current_count - prev) + _last_count[line_id] = current_count + total_count = delta + except PLCOfflineError: + # PLC unreachable — keep count at 0 for this tick + pass + + good_count = total_count # no reject tracking until Week 5 + + # DB work in thread + def _db_work(): + db = SessionLocal() + try: + run_time_sec = _run_time_in_window(db, line_id, TICK_SEC) + ideal_cycle_sec = _active_ideal_cycle(db, line_id) + wo_id = _active_work_order_id(db, line_id) + utilisation = _active_utilisation(db, line_id) + + metrics = compute_oee( + run_time_sec=run_time_sec, + planned_time_sec=planned_time_sec, + total_count=total_count, + good_count=good_count, + ideal_cycle_sec=ideal_cycle_sec, + utilisation=utilisation, + ) + _write_snapshot(db, line_id, wo_id, run_time_sec, planned_time_sec, + total_count, good_count, ideal_cycle_sec, metrics) + db.commit() + return metrics, ideal_cycle_sec, run_time_sec + except Exception: + db.rollback() + logger.exception("OEE snapshot write failed for line %s", line.name) + return None, DEFAULT_IDEAL_CYCLE_SEC, 0 + finally: + db.close() + + result = await asyncio.to_thread(_db_work) + metrics, ideal_cycle_sec, run_time_sec = result + if metrics: + _check_alert(line.name, line_id, metrics["oee"]) + logger.info( + "OEE %-12s A=%.2f P=%.2f Q=%.2f OEE=%.2f " + "run=%ds count=%d cycle=%.1fs", + line.name, + metrics["availability"], metrics["performance"], + metrics["quality"], metrics["oee"], + run_time_sec, total_count, ideal_cycle_sec, + ) + + +# ── Tick loop ───────────────────────────────────────────────────────────────── + +async def run(tick_sec: int = TICK_SEC) -> None: + """Background OEE calculator — one tick per line every tick_sec seconds.""" + global _stop_event + _stop_event = asyncio.Event() + logger.info("OEE calculator starting (tick=%ds, threshold=%.0f%%)", + tick_sec, OEE_ALERT_THRESHOLD * 100) + + tick = 0 + lines: list = [] + + while not _stop_event.is_set(): + if tick % 60 == 0: + db = SessionLocal() + try: + lines = db.query(Line).all() + finally: + db.close() + + if lines: + await asyncio.gather( + *[_tick_line(line) for line in lines], + return_exceptions=True, + ) + + tick += 1 + try: + await asyncio.wait_for(_stop_event.wait(), timeout=tick_sec) + except asyncio.TimeoutError: + pass + + +def stop() -> None: + if _stop_event: + _stop_event.set() + + +# ── Public read API (used by routes) ───────────────────────────────────────── + +def get_low_oee_ticks(line_id: str) -> int: + """How many consecutive ticks has this line been below threshold.""" + return _low_oee_ticks.get(line_id, 0) diff --git a/services/mes/tests/test_cmms.py b/services/mes/tests/test_cmms.py new file mode 100644 index 0000000..f75776f --- /dev/null +++ b/services/mes/tests/test_cmms.py @@ -0,0 +1,299 @@ +"""Week 6 tests — Atlas CMMS bidirectional sync. + +All tests use unittest.mock — no live DB, no GitHub API calls. +Run: pytest tests/test_cmms.py -v + +Acceptance Criteria (PRD-MES-CORE.md §10): + AC#9 Work order created in CMMS appears in /api/mes/work-orders (ingest) + AC#10 Work order created via API appears in CMMS (sync push) +""" + +import uuid +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from backend.main import app +from backend.models.db_models import Line, Product, WorkOrder, WorkOrderStatus +from backend.services.cmms_client import format_work_order + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +LINE_ID = str(uuid.uuid4()) +PRODUCT_ID = str(uuid.uuid4()) +WO_ID = str(uuid.uuid4()) +NOW = datetime.now(timezone.utc) + + +def _make_line(name: str = "Conveyor-1") -> Line: + line = MagicMock(spec=Line) + line.id = uuid.UUID(LINE_ID) + line.name = name + line.isa95_path = "lakewales/floor/conveyor-1" + line.description = "Main conveyor" + return line + + +def _make_product(sku: str = "SKU-001") -> Product: + p = MagicMock(spec=Product) + p.id = uuid.UUID(PRODUCT_ID) + p.sku = sku + p.name = "Widget A" + p.ideal_cycle_sec = 2.0 + return p + + +def _make_wo( + status: WorkOrderStatus = WorkOrderStatus.PENDING, + cmms_ref: str = None, +) -> WorkOrder: + wo = MagicMock(spec=WorkOrder) + wo.id = uuid.UUID(WO_ID) + wo.order_number = "WO-001" + wo.product_id = uuid.UUID(PRODUCT_ID) + wo.line_id = uuid.UUID(LINE_ID) + wo.target_qty = 100 + wo.good_qty = 0 + wo.status = status + wo.scheduled_start = None + wo.actual_start = None + wo.actual_end = None + wo.notes = None + wo.cmms_ref = cmms_ref + wo.cmms_synced_at = NOW if cmms_ref else None + wo.created_at = NOW + return wo + + +@pytest.fixture() +def client(): + from backend.database import get_db + mock_db = MagicMock() + app.dependency_overrides[get_db] = lambda: mock_db + with TestClient(app, raise_server_exceptions=True) as c: + yield c, mock_db + app.dependency_overrides.clear() + + +# ── format_work_order (pure function) ───────────────────────────────────────── + +class TestFormatWorkOrder: + def test_maps_all_required_fields(self): + meta = format_work_order( + order_number="WO-001", + status="ACTIVE", + target_qty=100, + good_qty=80, + line_name="Conveyor-1", + line_isa95_path="lakewales/floor/conveyor-1", + product_sku="SKU-001", + product_name="Widget A", + ) + assert meta["work_order_id"] == "WO-001" + assert meta["status"] == "active" + assert meta["asset_name"] == "Conveyor-1" + assert meta["cmms_system"] == "FactoryLM MES" + assert "Widget A" in meta["title"] + assert "Conveyor-1" in meta["title"] + + def test_description_includes_qty(self): + meta = format_work_order( + order_number="WO-002", status="PENDING", + target_qty=200, good_qty=0, + line_name="Sorting-1", line_isa95_path="lakewales/floor/sorting-1", + product_sku="SKU-002", product_name="Bolt B", + ) + assert "200" in meta["description"] + assert "SKU-002" in meta["description"] + + def test_notes_in_completion_notes(self): + meta = format_work_order( + order_number="WO-003", status="COMPLETE", + target_qty=50, good_qty=50, + line_name="L1", line_isa95_path="lw/floor/l1", + product_sku="SKU-003", product_name="Part C", + notes="All good, no issues", + ) + assert meta["completion_notes"] == "All good, no issues" + + def test_completed_date_populated_when_actual_end_given(self): + end = datetime(2026, 4, 16, 12, 0, 0, tzinfo=timezone.utc) + meta = format_work_order( + order_number="WO-004", status="COMPLETE", + target_qty=50, good_qty=50, + line_name="L1", line_isa95_path="lw/floor/l1", + product_sku="SKU-004", product_name="Part D", + actual_end=end, + ) + assert "2026-04-16" in meta["completed_date"] + + def test_empty_completed_date_when_no_actual_end(self): + meta = format_work_order( + order_number="WO-005", status="ACTIVE", + target_qty=50, good_qty=0, + line_name="L1", line_isa95_path="lw/floor/l1", + product_sku="SKU-005", product_name="Part E", + ) + assert meta["completed_date"] == "" + + +# ── push_work_order: disabled mode (no HTTP) ────────────────────────────────── + +class TestPushWorkOrderDisabled: + def test_returns_synthetic_gist_when_disabled(self): + """cmms_enabled=False → synthetic response, no HTTP call.""" + from backend.services.cmms_client import push_work_order + meta = format_work_order( + order_number="WO-100", status="PENDING", + target_qty=10, good_qty=0, + line_name="L1", line_isa95_path="lw/floor/l1", + product_sku="SKU-100", product_name="Thing", + ) + result = push_work_order(meta) # cmms_enabled=False by default in test env + assert "gist_id" in result + assert "gist_url" in result + assert "gist.github.com" in result["gist_url"] + + def test_uses_existing_gist_id_in_synthetic_response(self): + from backend.services.cmms_client import push_work_order + meta = format_work_order( + order_number="WO-101", status="ACTIVE", + target_qty=10, good_qty=5, + line_name="L1", line_isa95_path="lw/floor/l1", + product_sku="SKU-101", product_name="Thing", + ) + result = push_work_order(meta, gist_id="existing123") + assert result["gist_id"] == "existing123" + + +# ── CMMS API endpoints ──────────────────────────────────────────────────────── + +class TestSyncEndpoint: + def test_sync_returns_200_with_gist_url(self, client): + tc, db = client + wo = _make_wo() + line = _make_line() + product = _make_product() + db.query.return_value.filter.return_value.first.side_effect = [ + wo, line, product, + ] + db.refresh.side_effect = lambda obj: None + + resp = tc.post(f"/api/mes/cmms/sync/{WO_ID}") + assert resp.status_code == 200 + data = resp.json() + assert "gist_id" in data + assert "gist_url" in data + assert data["order_number"] == "WO-001" + + def test_sync_unknown_wo_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None + resp = tc.post(f"/api/mes/cmms/sync/{WO_ID}") + assert resp.status_code == 404 + + def test_sync_updates_existing_gist(self, client): + """If cmms_ref already set, push uses the existing Gist ID.""" + tc, db = client + wo = _make_wo(cmms_ref="existing-gist-abc") + db.query.return_value.filter.return_value.first.side_effect = [ + wo, _make_line(), _make_product(), + ] + db.refresh.side_effect = lambda obj: None + + resp = tc.post(f"/api/mes/cmms/sync/{WO_ID}") + assert resp.status_code == 200 + # Should carry through the existing gist_id in synthetic mode + assert resp.json()["gist_id"] == "existing-gist-abc" + + +class TestSyncStatusEndpoint: + def test_unsynced_wo_returns_synced_false(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = _make_wo() + resp = tc.get(f"/api/mes/cmms/sync/{WO_ID}") + assert resp.status_code == 200 + assert resp.json()["synced"] is False + assert resp.json()["cmms_ref"] is None + + def test_synced_wo_returns_synced_true(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = _make_wo( + cmms_ref="abc123" + ) + resp = tc.get(f"/api/mes/cmms/sync/{WO_ID}") + assert resp.status_code == 200 + assert resp.json()["synced"] is True + assert resp.json()["cmms_ref"] == "abc123" + + def test_unknown_wo_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None + resp = tc.get(f"/api/mes/cmms/sync/{WO_ID}") + assert resp.status_code == 404 + + +class TestIngestEndpoint: + def test_ingest_creates_wo_from_cmms(self, client): + tc, db = client + wo = _make_wo() + db.query.return_value.filter.return_value.first.side_effect = [ + None, # order_number unique check (not exists) + _make_line(), # line by name + _make_product(), # product by sku + ] + db.refresh.side_effect = lambda obj: None + + with patch("backend.routes.cmms.WorkOrder", return_value=wo): + resp = tc.post("/api/mes/cmms/ingest", json={ + "order_number": "WO-CMMS-001", + "product_sku": "SKU-001", + "line_name": "Conveyor-1", + "target_qty": 100, + "cmms_ref": "gist-abc123", + }) + assert resp.status_code == 201 + + def test_ingest_idempotent_on_duplicate_order_number(self, client): + tc, db = client + existing_wo = _make_wo() + db.query.return_value.filter.return_value.first.return_value = existing_wo + resp = tc.post("/api/mes/cmms/ingest", json={ + "order_number": "WO-001", + "product_sku": "SKU-001", + "line_name": "Conveyor-1", + "target_qty": 100, + }) + # Should return 201 with the existing WO, not error + assert resp.status_code == 201 + + def test_ingest_unknown_line_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.side_effect = [ + None, # order_number not exists + None, # line not found + ] + resp = tc.post("/api/mes/cmms/ingest", json={ + "order_number": "WO-999", + "product_sku": "SKU-001", + "line_name": "NonExistentLine", + "target_qty": 50, + }) + assert resp.status_code == 404 + + def test_ingest_unknown_product_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.side_effect = [ + None, # order_number not exists + _make_line(), # line found + None, # product not found + ] + resp = tc.post("/api/mes/cmms/ingest", json={ + "order_number": "WO-998", + "product_sku": "SKU-GHOST", + "line_name": "Conveyor-1", + "target_qty": 50, + }) + assert resp.status_code == 404 diff --git a/services/mes/tests/test_downtime.py b/services/mes/tests/test_downtime.py new file mode 100644 index 0000000..6b76bd7 --- /dev/null +++ b/services/mes/tests/test_downtime.py @@ -0,0 +1,290 @@ +"""Week 5 tests — Downtime tracking: NLP classifier + API endpoints. + +All tests use unittest.mock — no live DB or plc-modbus required. +Run: pytest tests/test_downtime.py -v + +Acceptance Criteria (PRD-MES-CORE.md §10): + AC#4 Downtime event captured within 10s of fault (covered by state_poller) + AC#8 NLP classifier maps common phrases to correct reason codes + AC#9 Manual reason entry updates open DOWN event +""" + +import uuid +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from backend.main import app +from backend.models.db_models import ( + DowntimeReason, + DowntimeCategory, + EnteredBy, + Line, + MachineState, + MachineStateEnum, +) +from backend.services.downtime_classifier import classify_reason + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +LINE_ID = str(uuid.uuid4()) +EVENT_ID = str(uuid.uuid4()) +NOW = datetime.now(timezone.utc) + + +def _make_line(): + line = MagicMock(spec=Line) + line.id = uuid.UUID(LINE_ID) + line.name = "Conveyor-1" + return line + + +def _make_reason(code: str, description: str, category: str = "UNPLANNED") -> DowntimeReason: + r = MagicMock(spec=DowntimeReason) + r.code = code + r.description = description + r.category = DowntimeCategory(category) + return r + + +def _make_event( + state: MachineStateEnum = MachineStateEnum.DOWN, + reason_code: str = None, + ended_at=None, +) -> MachineState: + e = MagicMock(spec=MachineState) + e.id = uuid.UUID(EVENT_ID) + e.line_id = uuid.UUID(LINE_ID) + e.state = state + e.reason_code = reason_code + e.entered_by = EnteredBy.PLC + e.notes = None + e.started_at = NOW + e.ended_at = ended_at + return e + + +# ── NLP Classifier ──────────────────────────────────────────────────────────── + +class TestClassifyReason: + # High-confidence hits + def test_jam(self): + code, conf = classify_reason("the conveyor is jammed") + assert code == "JAM" and conf == "high" + + def test_estop(self): + code, conf = classify_reason("emergency stop activated") + assert code == "E_STOP" and conf == "high" + + def test_estop_hyphen(self): + code, conf = classify_reason("e-stop was hit") + assert code == "E_STOP" and conf == "high" + + def test_pm(self): + code, conf = classify_reason("scheduled PM window") + assert code == "MAINT_PM" and conf == "high" + + def test_preventive(self): + code, conf = classify_reason("preventive maintenance") + assert code == "MAINT_PM" and conf == "high" + + def test_breakdown(self): + code, conf = classify_reason("machine broke down unexpectedly") + assert code == "MAINT_BREAKDOWN" and conf == "high" + + def test_tooling(self): + code, conf = classify_reason("tooling change on line 1") + assert code == "CHANGEOVER_TOOLING" and conf == "high" + + def test_changeover(self): + code, conf = classify_reason("product changeover to part B") + assert code == "CHANGEOVER_PRODUCT" and conf == "high" + + def test_starved(self): + code, conf = classify_reason("line is starved, no material") + assert code == "STARVED_MATERIAL" and conf == "high" + + def test_blocked(self): + code, conf = classify_reason("downstream is blocked") + assert code == "BLOCKED_DOWNSTREAM" and conf == "high" + + def test_quality_hold(self): + code, conf = classify_reason("quality hold for inspection") + assert code == "QUALITY_HOLD" and conf == "high" + + def test_overload(self): + code, conf = classify_reason("motor overload tripped") + assert code == "OVERLOAD" and conf == "high" + + def test_overheat(self): + code, conf = classify_reason("drive is overheating") + assert code == "OVERHEAT" and conf == "high" + + def test_sensor(self): + code, conf = classify_reason("proximity sensor failed") + assert code == "SENSOR_FAIL" and conf == "high" + + def test_comms(self): + code, conf = classify_reason("modbus timeout — comms lost") + assert code == "COMMS_FAIL" and conf == "high" + + # Fallback + def test_unknown_fallback(self): + code, conf = classify_reason("something weird happened on the floor") + assert code == "UNKNOWN" and conf == "low" + + def test_empty_string_fallback(self): + code, conf = classify_reason("") + assert code == "UNKNOWN" and conf == "low" + + def test_none_safe(self): + code, conf = classify_reason(None) + assert code == "UNKNOWN" and conf == "low" + + # Priority: E_STOP beats breakdown + def test_estop_priority_over_breakdown(self): + code, conf = classify_reason("e-stop triggered, motor fault") + assert code == "E_STOP" and conf == "high" + + # Case insensitive + def test_case_insensitive(self): + code, conf = classify_reason("JAM IN THE FEEDER") + assert code == "JAM" and conf == "high" + + +# ── Downtime API (via TestClient + mocked DB) ───────────────────────────────── + +@pytest.fixture() +def client(): + from backend.database import get_db + mock_db = MagicMock() + app.dependency_overrides[get_db] = lambda: mock_db + with TestClient(app, raise_server_exceptions=True) as c: + yield c, mock_db + app.dependency_overrides.clear() + + +class TestListDowntimeReasons: + def test_returns_200_list(self, client): + tc, db = client + db.query.return_value.order_by.return_value.all.return_value = [ + _make_reason("JAM", "Conveyor or mechanism jam"), + _make_reason("MAINT_PM", "Planned preventive maintenance", "PLANNED"), + ] + resp = tc.get("/api/mes/downtime-reasons") + assert resp.status_code == 200 + assert len(resp.json()) == 2 + + +class TestGetLineDowntime: + def test_returns_200_list(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = _make_line() + db.query.return_value.filter.return_value.order_by.return_value.all.return_value = [ + _make_event(MachineStateEnum.DOWN, "JAM"), + ] + # second query for DowntimeReason join + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), + _make_reason("JAM", "Conveyor or mechanism jam"), + ] + resp = tc.get(f"/api/mes/lines/{LINE_ID}/downtime") + assert resp.status_code == 200 + + def test_unknown_line_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None + resp = tc.get(f"/api/mes/lines/{LINE_ID}/downtime") + assert resp.status_code == 404 + + +class TestEnterDowntimeReason: + def test_direct_reason_code_succeeds(self, client): + tc, db = client + event = _make_event(MachineStateEnum.DOWN, None) + reason = _make_reason("JAM", "Conveyor or mechanism jam") + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), # line lookup + event, # open DOWN event + reason, # validate reason_code + reason, # final reason join + ] + db.query.return_value.filter.return_value.order_by.return_value.first.return_value = event + db.refresh.side_effect = lambda obj: None + + resp = tc.post(f"/api/mes/lines/{LINE_ID}/downtime", json={ + "reason_code": "JAM", + "entered_by": "OPERATOR", + }) + assert resp.status_code == 200 + assert resp.json()["event"]["reason_code"] == "JAM" + + def test_nlp_description_classifies_and_succeeds(self, client): + tc, db = client + event = _make_event(MachineStateEnum.DOWN, None) + reason = _make_reason("JAM", "Conveyor or mechanism jam") + # Line lookup → filter().first(); open event → order_by().first() + # NLP path: no reason validation query; final reason join → filter().first() + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), + reason, # final reason join after NLP classify + ] + db.query.return_value.filter.return_value.order_by.return_value.first.return_value = event + db.refresh.side_effect = lambda obj: None + + resp = tc.post(f"/api/mes/lines/{LINE_ID}/downtime", json={ + "description": "the conveyor is jammed", + "entered_by": "MIRA_AI", + }) + assert resp.status_code == 200 + data = resp.json() + assert data["classified"] is True + assert data["confidence"] == "high" + + def test_no_open_event_returns_409(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = _make_line() + db.query.return_value.filter.return_value.order_by.return_value.first.return_value = None + resp = tc.post(f"/api/mes/lines/{LINE_ID}/downtime", json={ + "reason_code": "JAM", + "entered_by": "OPERATOR", + }) + assert resp.status_code == 409 + + def test_unknown_reason_code_returns_422(self, client): + tc, db = client + event = _make_event(MachineStateEnum.DOWN, None) + # Line → filter().first(); open event → order_by().first(); reason lookup → filter().first() = None + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), + None, # reason_code not found in downtime_reasons + ] + db.query.return_value.filter.return_value.order_by.return_value.first.return_value = event + resp = tc.post(f"/api/mes/lines/{LINE_ID}/downtime", json={ + "reason_code": "MADE_UP_CODE", + "entered_by": "OPERATOR", + }) + assert resp.status_code == 422 + + def test_missing_both_fields_returns_422(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), + _make_event(MachineStateEnum.DOWN, None), + ] + db.query.return_value.filter.return_value.order_by.return_value.first.return_value = _make_event() + resp = tc.post(f"/api/mes/lines/{LINE_ID}/downtime", json={ + "entered_by": "OPERATOR", + }) + assert resp.status_code == 422 + + def test_unknown_line_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None + resp = tc.post(f"/api/mes/lines/{LINE_ID}/downtime", json={ + "reason_code": "JAM", + "entered_by": "OPERATOR", + }) + assert resp.status_code == 404 diff --git a/services/mes/tests/test_oee.py b/services/mes/tests/test_oee.py new file mode 100644 index 0000000..b78bfd5 --- /dev/null +++ b/services/mes/tests/test_oee.py @@ -0,0 +1,183 @@ +"""Week 3 tests — OEE computation and calculator logic. + +All tests use unittest.mock — no live DB or plc-modbus required. +Run: pytest tests/test_oee.py -v + +Acceptance Criteria (PRD-MES-CORE.md §10): + AC#3 OEE calculates correctly (known inputs → expected output ± 0.01) + AC#5 TEEP reported alongside OEE +""" + +import pytest +from backend.services.oee_calculator import ( + OEE_ALERT_TICKS, + OEE_ALERT_THRESHOLD, + _check_alert, + _low_oee_ticks, + compute_oee, +) + + +# ── compute_oee pure function ───────────────────────────────────────────────── + +class TestComputeOEE: + def test_perfect_oee(self): + """100% on all three components → OEE = 1.0""" + m = compute_oee( + run_time_sec=60, + planned_time_sec=60, + total_count=60, + good_count=60, + ideal_cycle_sec=1.0, + ) + assert m["availability"] == pytest.approx(1.0, abs=0.01) + assert m["performance"] == pytest.approx(1.0, abs=0.01) + assert m["quality"] == pytest.approx(1.0, abs=0.01) + assert m["oee"] == pytest.approx(1.0, abs=0.01) + + def test_typical_factory_oee(self): + """Walker Reynolds benchmark: typical factory ~55% OEE. + + Inputs: + planned=60s, run=48s → A=0.80 + count=40 @ 1s/part, run=48s → P=40/48=0.833 + good=38/40 → Q=0.95 + OEE = 0.80 × 0.833 × 0.95 ≈ 0.633 + """ + m = compute_oee( + run_time_sec=48, + planned_time_sec=60, + total_count=40, + good_count=38, + ideal_cycle_sec=1.0, + ) + assert m["availability"] == pytest.approx(0.80, abs=0.01) + assert m["performance"] == pytest.approx(0.833, abs=0.01) + assert m["quality"] == pytest.approx(0.95, abs=0.01) + assert m["oee"] == pytest.approx(0.633, abs=0.01) + assert "teep" in m + + def test_zero_planned_time_returns_zeros(self): + """OFFLINE line — planned_time=0 → all zeros, no division error.""" + m = compute_oee( + run_time_sec=0, + planned_time_sec=0, + total_count=0, + good_count=0, + ideal_cycle_sec=1.0, + ) + assert m["oee"] == 0.0 + assert m["availability"] == 0.0 + + def test_zero_count_returns_zero_performance_and_quality(self): + """Line running but no parts produced — P and Q are 0.""" + m = compute_oee( + run_time_sec=60, + planned_time_sec=60, + total_count=0, + good_count=0, + ideal_cycle_sec=1.0, + ) + assert m["availability"] == pytest.approx(1.0, abs=0.01) + assert m["performance"] == 0.0 + assert m["quality"] == 0.0 + assert m["oee"] == 0.0 + + def test_performance_clamped_to_1(self): + """Items produced faster than ideal → Performance capped at 1.0.""" + m = compute_oee( + run_time_sec=60, + planned_time_sec=60, + total_count=120, # twice the ideal rate + good_count=120, + ideal_cycle_sec=1.0, + ) + assert m["performance"] == pytest.approx(1.0, abs=0.01) + assert m["oee"] == pytest.approx(1.0, abs=0.01) + + def test_availability_clamped_to_1(self): + """run_time > planned_time (clock skew) → clamped to 1.0.""" + m = compute_oee( + run_time_sec=65, + planned_time_sec=60, + total_count=60, + good_count=60, + ideal_cycle_sec=1.0, + ) + assert m["availability"] == pytest.approx(1.0, abs=0.01) + + def test_teep_equals_oee_without_schedule(self): + """Until schedules are wired (Week 4), TEEP == OEE.""" + m = compute_oee(60, 60, 60, 60, 1.0) + assert m["teep"] == pytest.approx(m["oee"], abs=0.001) + + def test_slow_cycle_reduces_performance(self): + """Ideal=1s, actual=2s/part → P=0.5""" + m = compute_oee( + run_time_sec=60, + planned_time_sec=60, + total_count=30, # 30 parts in 60s = 2s/part + good_count=30, + ideal_cycle_sec=1.0, + ) + assert m["performance"] == pytest.approx(0.50, abs=0.01) + assert m["oee"] == pytest.approx(0.50, abs=0.01) + + def test_world_class_oee_benchmark(self): + """Walker benchmark: world-class ≥ 85%""" + m = compute_oee( + run_time_sec=55, + planned_time_sec=60, + total_count=54, + good_count=54, + ideal_cycle_sec=1.0, + ) + assert m["oee"] >= 0.85 + + def test_output_rounded_to_4_decimals(self): + m = compute_oee(48, 60, 40, 38, 1.0) + for key in ["availability", "performance", "quality", "oee", "teep"]: + val = m[key] + assert val == round(val, 4), f"{key} not rounded: {val}" + + +# ── Alert logic ─────────────────────────────────────────────────────────────── + +class TestAlertLogic: + def setup_method(self): + _low_oee_ticks.clear() + + def test_counter_increments_below_threshold(self): + _check_alert("Line-1", "lid1", OEE_ALERT_THRESHOLD - 0.01) + assert _low_oee_ticks["lid1"] == 1 + + def test_counter_resets_above_threshold(self): + _low_oee_ticks["lid2"] = 10 + _check_alert("Line-2", "lid2", OEE_ALERT_THRESHOLD + 0.01) + assert _low_oee_ticks["lid2"] == 0 + + def test_alert_fires_at_threshold_tick(self, caplog): + import logging + _low_oee_ticks["lid3"] = OEE_ALERT_TICKS - 1 + with caplog.at_level(logging.WARNING): + _check_alert("Line-3", "lid3", 0.40) + assert "ALERT" in caplog.text + assert _low_oee_ticks["lid3"] == OEE_ALERT_TICKS + + def test_no_alert_before_threshold_tick(self, caplog): + import logging + _low_oee_ticks["lid4"] = OEE_ALERT_TICKS - 2 + with caplog.at_level(logging.WARNING): + _check_alert("Line-4", "lid4", 0.40) + assert "ALERT" not in caplog.text + + +# ── OEE Threshold constants ─────────────────────────────────────────────────── + +class TestConstants: + def test_alert_threshold_is_60_percent(self): + assert OEE_ALERT_THRESHOLD == 0.60 + + def test_alert_ticks_is_30(self): + """30 ticks × 60s = 30 minutes before alert fires.""" + assert OEE_ALERT_TICKS == 30 diff --git a/services/mes/tests/test_work_orders.py b/services/mes/tests/test_work_orders.py new file mode 100644 index 0000000..b4ce219 --- /dev/null +++ b/services/mes/tests/test_work_orders.py @@ -0,0 +1,267 @@ +"""Week 4 tests — Work order CRUD, status transitions, and schedule-aware TEEP. + +All tests use unittest.mock — no live DB or plc-modbus required. +Run: pytest tests/test_work_orders.py -v + +Acceptance Criteria (PRD-MES-CORE.md §10): + AC#6 Work orders create / transition without errors + AC#7 One ACTIVE work order per line enforced +""" + +import uuid +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from backend.main import app +from backend.models.db_models import Line, Product, WorkOrder, WorkOrderStatus +from backend.services.oee_calculator import compute_oee + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +LINE_ID = str(uuid.uuid4()) +PRODUCT_ID = str(uuid.uuid4()) +WO_ID = str(uuid.uuid4()) + +NOW = datetime.now(timezone.utc) + + +def _make_line(): + line = MagicMock(spec=Line) + line.id = uuid.UUID(LINE_ID) + line.name = "Conveyor-1" + return line + + +def _make_product(ideal_cycle_sec: float = 2.0): + p = MagicMock(spec=Product) + p.id = uuid.UUID(PRODUCT_ID) + p.sku = "SKU-001" + p.name = "Widget A" + p.ideal_cycle_sec = ideal_cycle_sec + p.description = None + return p + + +def _make_wo(status: WorkOrderStatus = WorkOrderStatus.PENDING): + wo = MagicMock(spec=WorkOrder) + wo.id = uuid.UUID(WO_ID) + wo.order_number = "WO-001" + wo.product_id = uuid.UUID(PRODUCT_ID) + wo.line_id = uuid.UUID(LINE_ID) + wo.target_qty = 100 + wo.good_qty = 0 + wo.status = status + wo.scheduled_start = None + wo.actual_start = None + wo.actual_end = None + wo.notes = None + wo.created_at = NOW + return wo + + +# ── compute_oee: utilisation param (TEEP) ───────────────────────────────────── + +class TestComputeOEEUtilisation: + def test_teep_equals_oee_when_utilisation_1(self): + """Default utilisation=1.0 → TEEP == OEE (Week 3 behaviour preserved).""" + m = compute_oee( + run_time_sec=48, planned_time_sec=60, + total_count=40, good_count=38, ideal_cycle_sec=1.0, + ) + assert m["teep"] == pytest.approx(m["oee"], abs=0.001) + + def test_teep_scaled_by_utilisation(self): + """utilisation=0.5 → TEEP = OEE × 0.5""" + m = compute_oee( + run_time_sec=60, planned_time_sec=60, + total_count=60, good_count=60, ideal_cycle_sec=1.0, + utilisation=0.5, + ) + assert m["oee"] == pytest.approx(1.0, abs=0.001) + assert m["teep"] == pytest.approx(0.5, abs=0.001) + + def test_teep_zero_when_outside_schedule(self): + """utilisation=0.0 → TEEP = 0 (line running outside shift).""" + m = compute_oee( + run_time_sec=60, planned_time_sec=60, + total_count=60, good_count=60, ideal_cycle_sec=1.0, + utilisation=0.0, + ) + assert m["oee"] == pytest.approx(1.0, abs=0.001) + assert m["teep"] == 0.0 + + def test_teep_clamped_to_1(self): + """utilisation > 1.0 is clamped — TEEP never exceeds 1.0.""" + m = compute_oee( + run_time_sec=60, planned_time_sec=60, + total_count=60, good_count=60, ideal_cycle_sec=1.0, + utilisation=2.0, + ) + assert m["teep"] == pytest.approx(1.0, abs=0.001) + + def test_teep_rounded_to_4_decimals(self): + m = compute_oee(48, 60, 40, 38, 1.0, utilisation=0.75) + assert m["teep"] == round(m["teep"], 4) + + +# ── Work order API (via TestClient + mocked DB) ──────────────────────────────── + +@pytest.fixture() +def client(): + """FastAPI test client with DB dependency overridden.""" + from backend.database import get_db + + mock_db = MagicMock() + app.dependency_overrides[get_db] = lambda: mock_db + with TestClient(app, raise_server_exceptions=True) as c: + yield c, mock_db + app.dependency_overrides.clear() + + +class TestCreateWorkOrder: + def test_create_returns_201(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), # line exists + _make_product(), # product exists + None, # order_number unique check + ] + wo = _make_wo() + db.refresh.side_effect = lambda obj: None + + with patch("backend.routes.work_orders.WorkOrder", return_value=wo): + resp = tc.post("/api/mes/work-orders", json={ + "order_number": "WO-001", + "product_id": PRODUCT_ID, + "line_id": LINE_ID, + "target_qty": 100, + }) + + assert resp.status_code == 201 + assert resp.json()["order_number"] == "WO-001" + + def test_create_missing_line_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None # line not found + resp = tc.post("/api/mes/work-orders", json={ + "order_number": "WO-002", + "product_id": PRODUCT_ID, + "line_id": LINE_ID, + "target_qty": 50, + }) + assert resp.status_code == 404 + + def test_duplicate_order_number_returns_409(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.side_effect = [ + _make_line(), # line exists + _make_product(), # product exists + _make_wo(), # duplicate order_number + ] + resp = tc.post("/api/mes/work-orders", json={ + "order_number": "WO-001", + "product_id": PRODUCT_ID, + "line_id": LINE_ID, + "target_qty": 100, + }) + assert resp.status_code == 409 + + +class TestListWorkOrders: + def test_list_returns_200(self, client): + tc, db = client + db.query.return_value.order_by.return_value.all.return_value = [_make_wo()] + # No filters applied — query chain has no .filter() + db.query.return_value.filter.return_value.order_by.return_value.all.return_value = [_make_wo()] + resp = tc.get("/api/mes/work-orders") + assert resp.status_code == 200 + + def test_invalid_status_filter_returns_422(self, client): + tc, db = client + resp = tc.get("/api/mes/work-orders?status=BOGUS") + assert resp.status_code == 422 + + +class TestGetWorkOrder: + def test_found_returns_200(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = _make_wo() + resp = tc.get(f"/api/mes/work-orders/{WO_ID}") + assert resp.status_code == 200 + assert resp.json()["id"] == WO_ID + + def test_not_found_returns_404(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None + resp = tc.get(f"/api/mes/work-orders/{WO_ID}") + assert resp.status_code == 404 + + +class TestStatusTransitions: + def test_pending_to_active(self, client): + tc, db = client + wo = _make_wo(WorkOrderStatus.PENDING) + db.query.return_value.filter.return_value.first.side_effect = [ + wo, # fetch the WO + None, # no conflicting ACTIVE on this line + ] + db.refresh.side_effect = lambda obj: None + resp = tc.patch(f"/api/mes/work-orders/{WO_ID}/status", json={"status": "ACTIVE"}) + assert resp.status_code == 200 + + def test_complete_to_any_is_invalid(self, client): + tc, db = client + wo = _make_wo(WorkOrderStatus.COMPLETE) + db.query.return_value.filter.return_value.first.return_value = wo + resp = tc.patch(f"/api/mes/work-orders/{WO_ID}/status", json={"status": "ACTIVE"}) + assert resp.status_code == 422 + + def test_second_active_on_same_line_returns_409(self, client): + tc, db = client + wo = _make_wo(WorkOrderStatus.PENDING) + conflict = _make_wo(WorkOrderStatus.ACTIVE) + conflict.id = uuid.UUID(str(uuid.uuid4())) + db.query.return_value.filter.return_value.first.side_effect = [ + wo, # fetch the WO + conflict, # conflicting ACTIVE WO + ] + resp = tc.patch(f"/api/mes/work-orders/{WO_ID}/status", json={"status": "ACTIVE"}) + assert resp.status_code == 409 + + def test_unknown_status_value_returns_422(self, client): + tc, db = client + wo = _make_wo(WorkOrderStatus.PENDING) + db.query.return_value.filter.return_value.first.return_value = wo + resp = tc.patch(f"/api/mes/work-orders/{WO_ID}/status", json={"status": "LAUNCHED"}) + assert resp.status_code == 422 + + +# ── Product endpoints ────────────────────────────────────────────────────────── + +class TestProducts: + def test_create_product_returns_201(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = None # no duplicate + p = _make_product() + db.refresh.side_effect = lambda obj: None + + with patch("backend.routes.work_orders.Product", return_value=p): + resp = tc.post("/api/mes/products", json={ + "sku": "SKU-001", + "name": "Widget A", + "ideal_cycle_sec": 2.0, + }) + assert resp.status_code == 201 + + def test_duplicate_sku_returns_409(self, client): + tc, db = client + db.query.return_value.filter.return_value.first.return_value = _make_product() + resp = tc.post("/api/mes/products", json={ + "sku": "SKU-001", + "name": "Widget A", + "ideal_cycle_sec": 2.0, + }) + assert resp.status_code == 409 diff --git a/services/troubleshoot/adapters/telegram_bot.py.bak2 b/services/troubleshoot/adapters/telegram_bot.py.bak2 new file mode 100644 index 0000000..f8217ae --- /dev/null +++ b/services/troubleshoot/adapters/telegram_bot.py.bak2 @@ -0,0 +1,193 @@ +"""Thin Telegram adapter for the Troubleshoot Engine. + +Run: + cd services/troubleshoot + TELEGRAM_TOKEN= OPENAI_API_KEY= python -m adapters.telegram_bot +""" + +from __future__ import annotations + +import logging +import os +import socket +import sys +from pathlib import Path + +from dotenv import load_dotenv + +from telegram import InlineKeyboardButton, InlineKeyboardMarkup, Update +from telegram.ext import ( + Application, + CallbackQueryHandler, + CommandHandler, + ContextTypes, + MessageHandler, + filters, +) + +from engine import EngineResponse, SessionState, TroubleshootEngine +from engine.llm import get_provider + +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + level=logging.INFO, +) +logger = logging.getLogger(__name__) + +BASE_DIR = Path(__file__).resolve().parent.parent +WORKFLOWS_DIR = BASE_DIR / "workflows" +PHOTOS_DIR = BASE_DIR / "photos" +WORK_ORDERS_DIR = BASE_DIR / "work_orders" + +PHOTOS_DIR.mkdir(parents=True, exist_ok=True) +WORK_ORDERS_DIR.mkdir(parents=True, exist_ok=True) + +# Engine singleton — initialized in main() +engine: TroubleshootEngine | None = None + + +def _enforce_single_instance(): + """Bind a socket to prevent two bot instances on the same machine.""" + lock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + lock.bind(("127.0.0.1", 47200)) + lock.listen(1) + return lock + except OSError: + logger.error("Another bot instance is running on this machine. Exiting.") + sys.exit(1) + + +async def on_error(update: object, context: ContextTypes.DEFAULT_TYPE) -> None: + """Log unhandled exceptions so they don't fail silently.""" + logger.error("Unhandled error: %s", context.error, exc_info=context.error) + + +def _get_session(context: ContextTypes.DEFAULT_TYPE) -> SessionState | None: + return context.user_data.get("session") + + +def _set_session(context: ContextTypes.DEFAULT_TYPE, session: SessionState) -> None: + context.user_data["session"] = session + + +def _render_response(response: EngineResponse) -> tuple[str, InlineKeyboardMarkup | None]: + """Convert EngineResponse to Telegram message text + keyboard.""" + text = response.message_text + + keyboard = None + if response.buttons: + rows = [ + [InlineKeyboardButton(btn["label"], callback_data=btn["id"])] + for btn in response.buttons + ] + # Add skip button if expecting photo + if response.expecting == "photo" and not any( + btn["id"] == "__skip_photo__" for btn in response.buttons + ): + rows.append( + [InlineKeyboardButton("Skip", callback_data="__skip_photo__")] + ) + keyboard = InlineKeyboardMarkup(rows) + + if response.expecting == "done": + text += "\n\n✅ Session complete. Send /start to begin a new session." + + return text, keyboard + + +async def cmd_start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Handle /start — begin a new troubleshooting session.""" + user_id = f"tg_{update.effective_user.id}" + session, response = engine.start_session(user_id) + _set_session(context, session) + + text, keyboard = _render_response(response) + await update.message.reply_text(text, reply_markup=keyboard) + + +async def on_callback_query(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Handle inline button presses.""" + query = update.callback_query + await query.answer() + + session = _get_session(context) + if not session: + await query.edit_message_text("No active session. Send /start to begin.") + return + + response = await engine.handle_button(session, query.data) + text, keyboard = _render_response(response) + await query.edit_message_text(text, reply_markup=keyboard) + + +async def on_text(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Handle plain text messages.""" + session = _get_session(context) + if not session: + await update.message.reply_text("Send /start to begin a troubleshooting session.") + return + + response = await engine.handle_text(session, update.message.text) + text, keyboard = _render_response(response) + await update.message.reply_text(text, reply_markup=keyboard) + + +async def on_photo(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Handle photo messages — download and pass to engine.""" + session = _get_session(context) + if not session: + await update.message.reply_text("Send /start to begin a troubleshooting session.") + return + + # Download the largest photo + photo = update.message.photo[-1] + file = await photo.get_file() + n = len(session.photos) + 1 + filename = PHOTOS_DIR / f"{session.session_id}_{n}.jpg" + await file.download_to_drive(str(filename)) + logger.info("Photo saved: %s", filename) + + response = await engine.handle_photo(session, str(filename)) + text, keyboard = _render_response(response) + await update.message.reply_text(text, reply_markup=keyboard) + + +def main() -> None: + _lock = _enforce_single_instance() # noqa: F841 — prevent GC + + load_dotenv(BASE_DIR / ".env") + token = os.environ.get("TELEGRAM_TOKEN") + if not token: + logger.error("TELEGRAM_TOKEN not set") + return + + global engine + llm = get_provider() + engine = TroubleshootEngine( + workflows_dir=WORKFLOWS_DIR, + llm=llm, + work_orders_dir=WORK_ORDERS_DIR, + ) + + app = ( + Application.builder() + .token(token) + .connect_timeout(30) + .read_timeout(30) + .write_timeout(30) + .pool_timeout(10) + .build() + ) + app.add_handler(CommandHandler("start", cmd_start)) + app.add_handler(CallbackQueryHandler(on_callback_query)) + app.add_handler(MessageHandler(filters.PHOTO, on_photo)) + app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, on_text)) + app.add_error_handler(on_error) + + logger.info("Bot starting — %d workflows loaded", len(engine._workflows)) + app.run_polling(drop_pending_updates=True) + + +if __name__ == "__main__": + main() diff --git a/services/troubleshoot/photos/13962e01-6526-4e3c-9091-b2b9cd63b822_1.jpg b/services/troubleshoot/photos/13962e01-6526-4e3c-9091-b2b9cd63b822_1.jpg new file mode 100644 index 0000000..e61358f Binary files /dev/null and b/services/troubleshoot/photos/13962e01-6526-4e3c-9091-b2b9cd63b822_1.jpg differ diff --git a/services/troubleshoot/photos/13962e01-6526-4e3c-9091-b2b9cd63b822_2.jpg b/services/troubleshoot/photos/13962e01-6526-4e3c-9091-b2b9cd63b822_2.jpg new file mode 100644 index 0000000..93d4dc5 Binary files /dev/null and b/services/troubleshoot/photos/13962e01-6526-4e3c-9091-b2b9cd63b822_2.jpg differ diff --git a/services/troubleshoot/photos/e71f61e0-75a2-4f5d-af40-5790cc523f4d_1.jpg b/services/troubleshoot/photos/e71f61e0-75a2-4f5d-af40-5790cc523f4d_1.jpg new file mode 100644 index 0000000..e61358f Binary files /dev/null and b/services/troubleshoot/photos/e71f61e0-75a2-4f5d-af40-5790cc523f4d_1.jpg differ