From db697f527860d209a00a9e66c17de2acbb6eeaa2 Mon Sep 17 00:00:00 2001
From: Bruce
Date: Wed, 6 May 2026 23:40:23 +0800
Subject: [PATCH] feat(engine): replace resilience module with modular recovery
system
Overhaul agent error handling with dedicated components:
- recovery-loop: orchestrates retry logic with backoff
- error-classifier: categorizes errors for recovery strategy selection
- compressor: message compression for context management
- tool-loop-guard: prevents infinite tool invocation cycles
- recovery-metrics: prom-client based observability
Also includes:
- Auth: add @fastify/cookie support, unit tests for controller/service
- Infra: add .dockerignore, optimize web Dockerfile, add uninstall
script
- Throttle: refined rate-limit config
---
.dockerignore | 61 ++++
README.md | 43 +++
docs/AGENTS.md | 44 +--
docs/CONFIG.md | 14 +-
docs/GET_STARTED.md | 66 ++++-
docs/MULTI-USERS.md | 3 +-
docs/PROVIDERS.md | 33 ++-
docs/SECURITY.md | 3 +-
docs/SKILLS.md | 83 +++---
docs/SPEC.md | 64 ++--
infra/docker/web/Dockerfile | 41 +--
package.json | 1 +
packages/api/package.json | 2 +
.../src/__tests__/auth.integration.test.ts | 14 +-
packages/api/src/agents/agents.controller.ts | 2 +-
packages/api/src/agents/agents.service.ts | 7 +-
.../auth/__tests__/auth.controller.test.ts | 209 +++++++++++++
.../src/auth/__tests__/auth.service.test.ts | 161 ++++++++++
packages/api/src/auth/auth.constants.ts | 12 +
packages/api/src/auth/auth.controller.ts | 84 +++++-
packages/api/src/auth/auth.service.ts | 54 +++-
packages/api/src/bootstrap.ts | 23 +-
.../__tests__/agent-error-message.test.ts | 144 ---------
.../api/src/channels/agent-error-message.ts | 150 ----------
.../src/channels/message-router.service.ts | 26 +-
packages/api/src/common/index.ts | 5 +-
packages/api/src/common/security.config.ts | 2 +
packages/api/src/common/throttle.config.ts | 21 +-
.../__tests__/agent-runner.service.test.ts | 116 +++++++-
.../src/engine/__tests__/compressor.test.ts | 276 ++++++++++++++++++
.../engine/__tests__/error-classifier.test.ts | 162 ++++++++++
.../__tests__/reasoning-loop-timeout.test.ts | 19 +-
.../engine/__tests__/reasoning-loop.test.ts | 45 +--
.../__tests__/recovery-integration.test.ts | 156 ++++++++++
.../engine/__tests__/recovery-loop.test.ts | 222 ++++++++++++++
.../engine/__tests__/recovery-metrics.test.ts | 73 +++++
.../src/engine/__tests__/resilience.test.ts | 186 ------------
.../engine/__tests__/tool-loop-guard.test.ts | 72 +++++
.../api/src/engine/agent-runner.service.ts | 15 +-
packages/api/src/engine/compressor.ts | 239 +++++++++++++++
packages/api/src/engine/engine.module.ts | 2 +
packages/api/src/engine/error-classifier.ts | 262 +++++++++++++++++
packages/api/src/engine/reasoning-loop.ts | 40 ++-
packages/api/src/engine/recovery-loop.ts | 200 +++++++++++++
.../api/src/engine/recovery-loop.types.ts | 92 ++++++
packages/api/src/engine/recovery-metrics.ts | 64 ++++
packages/api/src/engine/resilience.ts | 159 ----------
packages/api/src/engine/tool-loop-guard.ts | 60 ++++
packages/api/src/engine/tool.ts | 6 +
packages/api/src/main.ts | 5 +
.../__tests__/install-parity.test.ts | 30 ++
packages/shared/src/schemas/agent.schema.ts | 6 +
packages/shared/src/schemas/auth.schema.ts | 4 +-
packages/shared/src/schemas/user.schema.ts | 1 +
.../app/(dashboard)/workspace/upload-zone.tsx | 5 +-
packages/web/src/app/login/page.tsx | 38 ++-
packages/web/src/components/auth-provider.tsx | 34 +--
packages/web/src/lib/api.ts | 3 +
packages/web/src/lib/auth.ts | 127 +++++---
pnpm-lock.yaml | 52 +++-
scripts/install.mjs | 159 ++++++++--
scripts/uninstall.mjs | 257 ++++++++++++++++
62 files changed, 3586 insertions(+), 973 deletions(-)
create mode 100644 .dockerignore
create mode 100644 packages/api/src/auth/__tests__/auth.controller.test.ts
create mode 100644 packages/api/src/auth/__tests__/auth.service.test.ts
delete mode 100644 packages/api/src/channels/__tests__/agent-error-message.test.ts
delete mode 100644 packages/api/src/channels/agent-error-message.ts
create mode 100644 packages/api/src/engine/__tests__/compressor.test.ts
create mode 100644 packages/api/src/engine/__tests__/error-classifier.test.ts
create mode 100644 packages/api/src/engine/__tests__/recovery-integration.test.ts
create mode 100644 packages/api/src/engine/__tests__/recovery-loop.test.ts
create mode 100644 packages/api/src/engine/__tests__/recovery-metrics.test.ts
delete mode 100644 packages/api/src/engine/__tests__/resilience.test.ts
create mode 100644 packages/api/src/engine/__tests__/tool-loop-guard.test.ts
create mode 100644 packages/api/src/engine/compressor.ts
create mode 100644 packages/api/src/engine/error-classifier.ts
create mode 100644 packages/api/src/engine/recovery-loop.ts
create mode 100644 packages/api/src/engine/recovery-loop.types.ts
create mode 100644 packages/api/src/engine/recovery-metrics.ts
delete mode 100644 packages/api/src/engine/resilience.ts
create mode 100644 packages/api/src/engine/tool-loop-guard.ts
create mode 100644 packages/shared/src/providers/__tests__/install-parity.test.ts
create mode 100644 scripts/uninstall.mjs
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..1cc62ab
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,61 @@
+# Dependencies — must be installed inside the container, never copied from host
+node_modules
+**/node_modules
+
+# Build output
+.next
+**/.next
+dist
+**/dist
+build
+**/build
+
+# Generated Prisma client (regenerated in container)
+packages/api/src/generated
+
+# TypeScript incremental build info — stale buildinfo causes tsc to skip
+# emitting .d.ts files in the container. Must be recursive (**) since the
+# files live inside each package directory.
+*.tsbuildinfo
+**/*.tsbuildinfo
+
+# Git
+.git
+.gitignore
+
+# Editor / OS
+.vscode
+.idea
+.DS_Store
+**/.DS_Store
+
+# Logs
+*.log
+**/*.log
+npm-debug.log*
+pnpm-debug.log*
+
+# Env files (mounted at runtime, not baked into images)
+.env
+.env.local
+.env.*.local
+**/.env
+**/.env.local
+**/.env.*.local
+
+# Test artifacts
+coverage
+**/coverage
+.nyc_output
+playwright-report
+test-results
+
+# Misc
+.turbo
+.cache
+**/.turbo
+**/.cache
+
+# References / docs (not needed in image)
+references
+docs
diff --git a/README.md b/README.md
index b144e1c..584b9cc 100644
--- a/README.md
+++ b/README.md
@@ -248,6 +248,49 @@ docker compose -f docker-compose.prod.yml up -d --build
docker compose -f docker-compose.prod.yml logs api | grep '\[bootstrap\]'
```
+## Uninstallation
+
+Remove Clawix completely with:
+
+```bash
+pnpm run uninstall:clawix # preserve host data
+pnpm run uninstall:clawix -- --full # complete removal
+```
+
+### Flags
+
+| Flag | Description |
+| --------------- | ----------------------------------------------------------------------- |
+| `--full` / `-f` | Remove Docker resources AND host data (.env, ./data/, ./skills/custom/) |
+| `--yes` / `-y` | Skip confirmation prompt |
+
+### What gets removed
+
+**Docker cleanup (default):**
+
+- Containers from both dev and prod environments
+- Images built by compose + `clawix-agent:latest`
+- Named volumes (`postgres_data`, `redis_data`, etc.)
+- Orphan containers
+
+**Host data (with `--full`):**
+
+- `.env` — configuration and secrets
+- `./data/` — runtime data, user workspaces
+- `./skills/custom/` — user-created skills
+
+### Fresh reinstall
+
+```bash
+# Full cleanup
+pnpm run uninstall:clawix -- --full -y
+
+# Reinstall from scratch
+pnpm run install:clawix
+```
+
+> Without `--full`, host data is preserved. The installer detects existing `.env` and skips configuration prompts, reusing your previous settings.
+
---
## Multi-Provider Support
diff --git a/docs/AGENTS.md b/docs/AGENTS.md
index 5d013b3..7c7905e 100644
--- a/docs/AGENTS.md
+++ b/docs/AGENTS.md
@@ -90,13 +90,13 @@ The page is divided into three sections:
The **Public Agents** table columns:
-| Column | Meaning |
-| ----------- | ----------------------------------------------- |
-| **Agent** | Agent name and internal identifier |
-| **Model** | Provider / model name (e.g., `openai / gpt-4o`) |
-| **Role** | `primary` or `worker` badge |
-| **Type** | `Public` (visible to all users) |
-| **Enabled** | Toggle switch; primary agents show "Always on" |
+| Column | Meaning |
+| ----------- | ------------------------------------------------------------------------------------------------------------------- |
+| **Agent** | Agent name and internal identifier |
+| **Model** | Provider / model name (e.g., `anthropic / claude-sonnet-4-6`, `openai / gpt-4o`, `gemini / gemini-3-flash-preview`) |
+| **Role** | `primary` or `worker` badge |
+| **Type** | `Public` (visible to all users) |
+| **Enabled** | Toggle switch; primary agents show "Always on" |
---
@@ -118,9 +118,9 @@ Public agents are available to all users and serve as the shared pool of officia
| **Name** | Yes | 1–255 characters. Used to reference the agent by name in spawn calls. |
| **Description** | No | Up to 2000 characters. Shown in the UI and injected into the primary agent's context so it knows what each worker does. |
| **System Prompt** | Yes | 1–50 000 characters. Defines the agent's persona, instructions, and capabilities. |
- | **Provider** | Yes | Select from configured providers (e.g., `openai`, `anthropic`). |
- | **Model** | Yes | Model identifier (e.g., `gpt-4o`, `claude-sonnet-4-6`). Must match the selected provider. |
- | **API Base URL** | No | Override the provider's default endpoint. Leave blank for standard Anthropic/OpenAI endpoints. |
+ | **Provider** | Yes | Select from configured providers (e.g., `anthropic`, `openai`, `gemini`, `zai-coding`, `kimi-code`). |
+ | **Model** | Yes | Model identifier (e.g., `claude-sonnet-4-6`, `gpt-4o`, `gemini-3-flash-preview`). Must match the selected provider. |
+ | **API Base URL** | No | Override the provider's default endpoint. Leave blank for the provider's built-in default. |
| **Max Tokens per Run** | No | Hard cap on tokens consumed per single run. Default: 100 000. |
4. Click **Create**. The agent appears in the **Public Agents** table.
@@ -335,18 +335,18 @@ curl -X POST http://localhost:3001/api/v1/agents/sub-agents \
## Field Reference Summary
-| Field | Type | Constraints | Default |
-| ----------------- | -------- | ----------------------------------------- | ------------------- |
-| `name` | string | 1–255 chars, required | — |
-| `description` | string | 0–2000 chars, optional | — |
-| `systemPrompt` | string | 1–50 000 chars, required | `""` |
-| `role` | enum | `primary` or `worker` | `primary` |
-| `provider` | string | Must match a configured ProviderConfig ID | `anthropic` |
-| `model` | string | Must be compatible with the provider | `claude-sonnet-4-6` |
-| `apiBaseUrl` | URL | Valid URL or null | `null` |
-| `skillIds` | string[] | Array of Skill CUIDs | `[]` |
-| `maxTokensPerRun` | integer | ≥ 1 | `100000` |
-| `isActive` | boolean | — | `true` |
+| Field | Type | Constraints | Default |
+| ----------------- | -------- | ----------------------------------------- | -------------------------- |
+| `name` | string | 1–255 chars, required | — |
+| `description` | string | 0–2000 chars, optional | — |
+| `systemPrompt` | string | 1–50 000 chars, required | `""` |
+| `role` | enum | `primary` or `worker` | `primary` |
+| `provider` | string | Must match a configured ProviderConfig ID | `anthropic` |
+| `model` | string | Must be compatible with the provider | `claude-sonnet-4-20250514` |
+| `apiBaseUrl` | URL | Valid URL or null | `null` |
+| `skillIds` | string[] | Array of Skill CUIDs | `[]` |
+| `maxTokensPerRun` | integer | ≥ 1 | `100000` |
+| `isActive` | boolean | — | `true` |
---
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index 4c740a9..053a971 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -101,12 +101,12 @@ A **Channel** is a configured integration that allows external messaging platfor
### Supported channel types
-| Type | Use case | Required credentials |
-| ------------ | -------------------------------------------------- | ------------------------- |
-| **Telegram** | Bot receives messages from Telegram users | Bot Token from @BotFather |
-| **Web** | Built-in web chat widget / dashboard conversations | None — always available |
-| **Slack** | Slash commands and DMs via Slack Bolt | App credentials (OAuth) |
-| **WhatsApp** | Production messaging via WhatsApp Business API | Business API credentials |
+| Type | Use case | Required credentials |
+| ------------ | -------------------------------------------------- | ------------------------------------------------- |
+| **Telegram** | Bot receives messages from Telegram users | Bot Token from @BotFather |
+| **Web** | Built-in web chat widget / dashboard conversations | None — always available |
+| **Slack** | Slash commands and DMs via Slack Bolt | App credentials (OAuth) |
+| **WhatsApp** | Production messaging via WhatsApp Business API | WhatsApp Business API via @whiskeysockets/baileys |
### Adding a Channel
@@ -198,6 +198,8 @@ Click **⋯** → **Edit**. API Key field: leave blank to keep the existing key;
| `anthropic` | `claude-opus-4-*`, `claude-sonnet-4-*`, `claude-haiku-4-*` | `ANTHROPIC_API_KEY` |
| `openai` | `gpt-4.1`, `gpt-4o`, `gpt-4o-mini`, `o3`, `codex-*` | `OPENAI_API_KEY` |
| `zai-coding` | `glm-*` | `ZAI_CODING_API_KEY` |
+| `gemini` | `gemini-3-pro-preview`, `gemini-3-flash-preview`, etc. | `GEMINI_API_KEY` |
+| `kimi-code` | (various) | `KIMI_CODE_API_KEY` |
| `custom` | Any (OpenAI-compatible) — Base URL required | — |
→ **Full detail:** [PROVIDERS.md](./PROVIDERS.md)
diff --git a/docs/GET_STARTED.md b/docs/GET_STARTED.md
index 3783745..36c7b85 100644
--- a/docs/GET_STARTED.md
+++ b/docs/GET_STARTED.md
@@ -10,7 +10,7 @@ Clawix lets you securely run AI-powered agents in isolated containers, coordinat
## Key Features
-- **Multi-Provider AI** - Anthropic, OpenAI, Azure, DeepSeek, Gemini, and custom endpoints
+- **Multi-Provider AI** - Anthropic, OpenAI, Z.AI Coding, Gemini, Kimi-code, and custom endpoints
- **Container Isolation** - Every agent runs in a sandboxed Docker container with resource limits
- **Warm Container Pool** - Eliminates cold-start latency for primary agents (1-3s → ~50ms)
- **Swarm Orchestration** - Delegate complex tasks to specialized sub-agents with DAG dependencies
@@ -28,12 +28,13 @@ Clawix lets you securely run AI-powered agents in isolated containers, coordinat
- [Node.js 20+](https://nodejs.org/)
- [pnpm 9+](https://pnpm.io/installation) (`npm install -g pnpm`)
- [Docker](https://docs.docker.com/get-docker/) + Docker Compose
+- [Docker Desktop](https://www.docker.com/products/docker-desktop/) (user-friendly platform for container management)
### Installation
```bash
# 1. Clone the repository
-git clone https://github.com/clawixAI/clawix.git
+git clone https://github.com/jasonli0226/clawix.git
cd clawix
# 2. Prepare your environment file
@@ -118,6 +119,34 @@ docker compose -f docker-compose.prod.yml up -d --build
docker compose -f docker-compose.prod.yml logs api | grep '\[bootstrap\]'
```
+### Uninstallation
+
+Remove Clawix completely:
+
+```bash
+pnpm run uninstall:clawix # preserve host data
+pnpm run uninstall:clawix -- --full # complete removal
+```
+
+| Flag | Description |
+| --------------- | ----------------------------------------------------------------------- |
+| `--full` / `-f` | Remove Docker resources AND host data (.env, ./data/, ./skills/custom/) |
+| `--yes` / `-y` | Skip confirmation prompt |
+
+**What gets removed:**
+
+- **Docker cleanup (default):** containers, images, named volumes from both dev and prod environments
+- **Host data (with `--full`):** `.env`, `./data/` (workspaces), `./skills/custom/` (user skills)
+
+**Fresh reinstall:**
+
+```bash
+pnpm run uninstall:clawix -- --full -y
+pnpm run install:clawix
+```
+
+> Without `--full`, host data is preserved. The installer detects existing `.env` and reuses your previous settings.
+
## Architecture
```
@@ -134,7 +163,7 @@ docker compose -f docker-compose.prod.yml logs api | grep '\[bootstrap\]'
┌─────────────────────────────────────────────────────────────────┐
│ Core Engine │
│ Reasoning Loops │ Tool Execution │ Swarm Coordinator │
-│ Multi-Provider (Anthropic, OpenAI, Azure, Custom) │
+│ Multi-Provider (Anthropic, OpenAI, Z.AI Coding, Gemini, Kimi-code, Custom) │
└─────────────────────────────────────────────────────────────────┘
│
┌─────────────────────────────────────────────────────────────────┐
@@ -152,15 +181,17 @@ docker compose -f docker-compose.prod.yml logs api | grep '\[bootstrap\]'
Clawix supports multiple AI providers through a unified interface:
-| Provider | Detection | Notes | Status |
-| ------------ | ------------------------------ | ---------------------- | ----------- |
-| Anthropic | model contains "claude" | Primary provider | done |
-| OpenAI | model contains "gpt" | Fallback | done |
-| Azure OpenAI | config key "azure_openai" | Enterprise deployments | coming soon |
-| DeepSeek | model contains "deepseek" | Cost-effective | coming soon |
-| Gemini | model contains "gemini" | Google AI | coming soon |
-| OpenRouter | API key starts with "sk-or-" | Gateway | coming soon |
-| Custom | any OpenAI-compatible endpoint | vLLM, Ollama, etc. | coming soon |
+| Provider | Detection | Notes | Status |
+| ------------ | ------------------------------ | ---------------------- | ------- |
+| Anthropic | model contains "claude" | Primary provider | done |
+| OpenAI | model contains "gpt" | Fallback | done |
+| Z.AI Coding | model contains "glm" | Z.AI Coding Plan | done |
+| Gemini | model contains "gemini" | Google AI | done |
+| Kimi-code | model contains "kimi" | Moonshot AI | done |
+| Azure OpenAI | config key "azure_openai" | Enterprise deployments | planned |
+| DeepSeek | model contains "deepseek" | Cost-effective | planned |
+| OpenRouter | API key starts with "sk-or-" | Gateway | planned |
+| Custom | any OpenAI-compatible endpoint | vLLM, Ollama, etc. | done |
New providers can be added by defining a `ProviderSpec` entry—no code changes needed.
@@ -216,7 +247,7 @@ pnpm run test # Run all tests (Vitest)
pnpm run test:coverage # Tests with coverage report
pnpm run lint # ESLint + type check
pnpm run format # Prettier format
-pnpm run docker:prod # Start Postgres, Redis, pgAdmin, API, Web
+pnpm run docker:dev # Start Postgres, Redis, pgAdmin
pnpm run docker:down # Stop local infra
pnpm run db:migrate # Run Prisma migrations
pnpm run db:studio # Open Prisma Studio
@@ -236,7 +267,7 @@ pnpm run db:studio # Open Prisma Studio
- **API:** NestJS 11 + Fastify adapter
- **Frontend:** Next.js 15 + Tailwind CSS + shadcn/ui
-- **AI:** Multi-provider (Anthropic, OpenAI, Azure, DeepSeek, Gemini, custom)
+- **AI:** Multi-provider (Anthropic, OpenAI, Z.AI Coding, Gemini, Kimi-code, custom)
- **Database:** Prisma + PostgreSQL 16
- **Cache:** Redis 7 (ioredis)
- **Testing:** Vitest + Playwright
@@ -290,7 +321,7 @@ We welcome contributions! Please see our contributing guidelines:
## Security
-If you discover a security vulnerability, please open a [GitHub Security Advisory](https://github.com/clawixAI/clawix/security/advisories/new) instead of using the public issue tracker.
+If you discover a security vulnerability, please open a [GitHub Security Advisory](https://github.com/jasonli0226/clawix/security/advisories/new) instead of using the public issue tracker.
Security best practices:
@@ -301,8 +332,11 @@ Security best practices:
## Roadmap
-- [ ] WhatsApp Business API integration
+- [x] WhatsApp Business API integration
- [ ] Slack integration
+- [ ] Azure OpenAI provider support
+- [ ] DeepSeek provider support
+- [ ] OpenRouter gateway support
- [ ] Advanced analytics dashboard
- [ ] Skill marketplace UI
- [ ] Multi-region deployment support
diff --git a/docs/MULTI-USERS.md b/docs/MULTI-USERS.md
index dae88a6..93ea778 100644
--- a/docs/MULTI-USERS.md
+++ b/docs/MULTI-USERS.md
@@ -117,7 +117,8 @@ flowchart LR
- **Web** — the authenticated JWT carries `userId`; the WebSocket gateway verifies the JWT on connect.
- **Telegram** — each Clawix user can claim one `telegramId`. Messages from an un-claimed Telegram id are rejected.
-- **WhatsApp / Slack** — **[pending]** (adapters not implemented).
+- **WhatsApp** — implemented via `@whiskeysockets/baileys` (Business API).
+- **Slack** — **[pending]** (adapter not implemented).
---
diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md
index 628a523..5a8ddfa 100644
--- a/docs/PROVIDERS.md
+++ b/docs/PROVIDERS.md
@@ -40,7 +40,7 @@ Configuring more than one provider gives your organization several concrete adva
## Built-in Provider Types
-Clawix ships with first-class support for three provider types. A `custom` type is also available for OpenAI-compatible endpoints.
+Clawix ships with first-class support for five provider types. A `custom` type is also available for OpenAI-compatible endpoints.
### Anthropic
@@ -53,8 +53,8 @@ Clawix ships with first-class support for three provider types. A `custom` type
### OpenAI
- **Provider ID:** `openai`
-- **Models:** `gpt-4.1`, `gpt-4o`, `gpt-4o-mini`, `o3`, `o3-mini`, `codex-*`, `gpt-5.*`
-- **Capabilities:** Tool calling (note: codex/GPT-5 models use the Responses API automatically)
+- **Models:** `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `gpt-4o`, `gpt-4o-mini`, `o3`, `o3-mini`, `o4-mini`, `codex-*`, `gpt-5.*`
+- **Capabilities:** Tool calling. Note: `codex-*` and `gpt-5.*` models use the **Responses API** automatically instead of the Chat Completions API.
- **Default endpoint:** OpenAI SDK default (no Base URL needed)
- **Environment seed variable:** `OPENAI_API_KEY`
@@ -65,12 +65,37 @@ Clawix ships with first-class support for three provider types. A `custom` type
- **Default endpoint:** `https://api.z.ai/api/coding/paas/v4`
- **Environment seed variable:** `ZAI_CODING_API_KEY`
+### Google Gemini
+
+- **Provider ID:** `gemini`
+- **Models:** `gemini-3-pro-preview`, `gemini-3.1-pro-preview`, `gemini-3-flash-preview`, `gemini-3-flash-lite-preview`, `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.5-flash-lite`
+- **Capabilities:** Tool calling
+- **Default endpoint:** `https://generativelanguage.googleapis.com/v1beta/`
+- **Environment seed variable:** `GEMINI_API_KEY`
+
+### Kimi Coding
+
+- **Provider ID:** `kimi-code`
+- **Models:** (various, via Kimi API)
+- **Default endpoint:** `https://api.kimi.com/coding`
+- **Environment seed variable:** `KIMI_CODE_API_KEY`
+
### Custom (OpenAI-compatible)
- **Provider ID:** any name matching `^[a-z0-9-]+$` that is not one of the above
- **Base URL:** required — point to your self-hosted or third-party OpenAI-compatible endpoint
- **Capabilities:** depends on the target model; tool calling not guaranteed
+### Planned providers
+
+The following providers are on the roadmap but not yet implemented:
+
+| Provider | Detection | Notes |
+| ------------ | ---------------------------- | ---------------------- |
+| Azure OpenAI | config key "azure_openai" | Enterprise deployments |
+| DeepSeek | model contains "deepseek" | Cost-effective |
+| OpenRouter | API key starts with "sk-or-" | Gateway |
+
---
## Adding a Provider
@@ -201,6 +226,8 @@ On first startup (when the `ProviderConfig` table is empty), Clawix automaticall
| `ANTHROPIC_API_KEY` | `anthropic` |
| `OPENAI_API_KEY` | `openai` |
| `ZAI_CODING_API_KEY` | `zai-coding` |
+| `GEMINI_API_KEY` | `gemini` |
+| `KIMI_CODE_API_KEY` | `kimi-code` |
The **first** variable found becomes the default provider. Once records exist in the database, the seed step is skipped on subsequent restarts. All future credential management should be done via the UI or admin API.
diff --git a/docs/SECURITY.md b/docs/SECURITY.md
index 6f945a0..7fec3ab 100644
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@@ -119,7 +119,8 @@ Structured logs (`pino`) record every tool call, LLM invocation, and container l
| DB-level append-only enforcement for `AuditLog` | **[pending]** |
| Key rotation automation | **[pending]** — manual re-encryption via `scripts/encrypt-secret.mjs` today |
| Self-service registration and password reset | **[pending]** |
-| WhatsApp / Slack channel adapters | **[pending]** |
+| WhatsApp channel adapter | Implemented |
+| Slack channel adapter | **[pending]** |
| Responsible-disclosure policy and contact | **[pending]** |
---
diff --git a/docs/SKILLS.md b/docs/SKILLS.md
index 12ad152..bf6b915 100644
--- a/docs/SKILLS.md
+++ b/docs/SKILLS.md
@@ -18,10 +18,10 @@ Navigate to **Skills** in the left sidebar (path: `/skills`) to browse all avail
The page is split into two sections:
-| Section | What it shows |
-| ------------------- | ----------------------------------------------------------------------------------------------------- |
-| **Built-in Skills** | Platform-shipped skills (`skills/builtin/`). Read-only. Available to every user. |
-| **Your Skills** | Custom skills you (or your agent) have created (`skills/custom/{userId}/`). Writable. Private to you. |
+| Section | What it shows |
+| ------------------- | ------------------------------------------------------------------------------------------------- |
+| **Built-in Skills** | Platform-shipped skills (`skills/builtin/`). Read-only. Available to every user. |
+| **Your Skills** | Custom skills you (or your agent) have created (`/skills/`). Writable. Private to you. |
Each skill card displays the skill **name**, **description** excerpt, and the **file path** to its `SKILL.md`. The page description reminds you that the `/create-skill` shorthand in a conversation triggers the skill-creator workflow.
@@ -53,29 +53,29 @@ Each skill card displays the skill **name**, **description** excerpt, and the **
```
skills/
-├── builtin/ # Shipped with Clawix — git-tracked, read-only
-│ ├── skill-creator/
-│ │ ├── SKILL.md
-│ │ └── scripts/
-│ │ └── init_skill.py
-│ └── projector-creator/
-│ ├── SKILL.md
-│ └── references/
-│ └── starter-template.html
-└── custom/ # Runtime data — gitignored, per-user read-write
- ├── {userId-1}/
- │ └── my-workflow/
- │ └── SKILL.md
- └── {userId-2}/
- └── ...
+└── builtin/ # Shipped with Clawix — git-tracked, read-only
+ ├── skill-creator/
+ │ ├── SKILL.md
+ │ └── scripts/
+ │ ├── init_skill.py
+ │ ├── quick_validate.py
+ │ └── package_skill.py
+ └── projector-creator/
+ ├── SKILL.md
+ └── references/
+ └── starter-template.html
+
+/skills/ # Inside each user's workspace — gitignored, per-user read-write
+└── my-workflow/
+ └── SKILL.md
```
### Two tiers
-| Tier | Path | Access | Purpose |
-| ------------ | ------------------------- | ----------------------- | ----------------------------------------------- |
-| **Built-in** | `skills/builtin/` | Read-only | Platform-shipped skills, updated via `git pull` |
-| **Custom** | `skills/custom/{userId}/` | Read-write (owner only) | User- and agent-created skills |
+| Tier | Path | Access | Purpose |
+| ------------ | --------------------- | ----------------------- | ----------------------------------------------- |
+| **Built-in** | `skills/builtin/` | Read-only | Platform-shipped skills, updated via `git pull` |
+| **Custom** | `/skills/` | Read-write (owner only) | User- and agent-created skills |
**Override rule:** If a custom skill directory shares the same name as a built-in skill directory, the custom skill takes precedence for that user. The directory name is the override key.
@@ -156,7 +156,7 @@ The `SkillLoaderService` scans both tiers and builds an XML summary injected int
data-parser
Parse CSV and JSON files into structured summaries. Use when...
- /skills/custom/data-parser/SKILL.md
+ /app/skills/custom/{userId}/data-parser/SKILL.md
custom
@@ -164,7 +164,7 @@ The `SkillLoaderService` scans both tiers and builds an XML summary injected int
**Stage 2 — On-demand loading:**
-When the agent decides a skill is relevant, it uses the `read_file` tool to load the full `SKILL.md` from the path in ``. No special "use_skill" command exists — agents use the same file tools they already have.
+When the agent decides a skill is relevant, it uses the `read_file` tool to load the full `SKILL.md` from the path in ``. No special "use_skill" command exists — agents use the same file tools they already have (e.g. `read_file`).
### System prompt ordering
@@ -181,12 +181,12 @@ When the agent decides a skill is relevant, it uses the `read_file` tool to load
When a container starts for a user:
-| Host path | Container path | Mode |
-| ------------------------------- | ------------------ | ---------- |
-| `{SKILLS_BUILTIN_DIR}/` | `/skills/builtin/` | Read-only |
-| `{SKILLS_CUSTOM_DIR}/{userId}/` | `/skills/custom/` | Read-write |
+| Host path | Container path | Mode |
+| ------------------------- | ------------------------------ | ---------- |
+| `{SKILLS_BUILTIN_DIR}/` | `/skills/builtin/` | Read-only |
+| `skills/custom/{userId}/` | `/app/skills/custom/{userId}/` | Read-write |
-The agent sees a flat `/skills/` tree inside the container and has no awareness of the host user directory structure. Built-in skills cannot be modified from inside the container.
+The agent sees built-in skills at `/skills/builtin/` (read-only) and its own custom skills at `/app/skills/custom/{userId}/` (read-write). Built-in skills cannot be modified from inside the container.
---
@@ -214,7 +214,7 @@ Example prompts that trigger skill creation:
#### Step 2 — Agent scaffolds and confirms
-The agent reads `skill-creator`'s instructions, creates the directory structure under `/skills/custom/`, writes the `SKILL.md`, adds any scripts, and confirms what it built.
+The agent reads `skill-creator`'s instructions, creates the directory structure under `/app/skills/custom/{userId}/`, writes the `SKILL.md`, adds any scripts, and confirms what it built.

@@ -228,7 +228,7 @@ The command palette shows all available slash commands with their descriptions p
### Option B — Create files directly (advanced)
-1. Create the directory `skills/custom//` inside the container at `/skills/custom//` (writable).
+1. Create the directory under `/app/skills/custom/{userId}//` inside the container (writable).
2. Write `SKILL.md` with valid frontmatter (see format above).
3. Optionally create `scripts/`, `references/`, `assets/` subdirectories.
@@ -246,7 +246,7 @@ The skill is discoverable on the next agent run. Within the same session, the ag
To validate programmatically:
```bash
-python3 /skills/builtin/skill-creator/scripts/quick_validate.py /skills/custom/
+python3 /skills/builtin/skill-creator/scripts/quick_validate.py /app/skills/custom/{userId}/
```
---
@@ -263,7 +263,7 @@ Simply describe the task. If a skill's `description` matches the situation, the
"Parse this Stripe webhook payload for me: {...}"
```
-The agent matches `stripe-webhook-parser` from the skill summary index, reads `/skills/custom/stripe-webhook-parser/SKILL.md`, and executes the parsing script.
+The agent matches `stripe-webhook-parser` from the skill summary index, reads `/app/skills/custom/{userId}/stripe-webhook-parser/SKILL.md`, and executes the parsing script.
### Explicit invocation
@@ -280,8 +280,8 @@ The agent reads the skill file, calls the script inside the container (`parser.p
### What the agent does step by step
1. Reads the skill summary XML in its system prompt — finds matching skill by description
-2. Calls `read_file("/skills/custom/stripe-webhook-parser/SKILL.md")` to load full instructions
-3. Follows the skill's instructions — may call `exec` to run scripts in the container
+2. Calls `read_file("/app/skills/custom/{userId}/stripe-webhook-parser/SKILL.md")` to load full instructions
+3. Follows the skill's instructions — may call `shell` to run scripts in the container
4. Returns the output in the format the skill specifies
---
@@ -332,11 +332,10 @@ Built-in skills live in `skills/builtin/` (git-tracked). They are updated when C
## Configuration
-| Env var | Default | Description |
-| --------------------- | ------------------------------------- | ------------------------------------------ |
-| `SKILLS_BUILTIN_DIR` | `/skills/builtin` | Absolute path to built-in skills directory |
-| `SKILLS_CUSTOM_DIR` | `/skills/custom` | Absolute path to custom skills root |
-| `MAX_SKILLS_PER_USER` | `50` | Maximum custom skills per user |
+| Env var | Default | Description |
+| --------------------- | ---------------------------- | ------------------------------------------ |
+| `SKILLS_BUILTIN_DIR` | `/skills/builtin` | Absolute path to built-in skills directory |
+| `MAX_SKILLS_PER_USER` | `50` | Maximum custom skills per user |
---
@@ -346,5 +345,5 @@ Built-in skills live in `skills/builtin/` (git-tracked). They are updated when C
- **No symlinks** — symlinked skill directories are rejected during scanning.
- **File size limit** — `SKILL.md` must be under 1 MB.
- **Built-in protection** — `skills/builtin/` is mounted read-only inside containers.
-- **User isolation** — only the owning user's `skills/custom/{userId}/` directory is mounted; no other user's custom directory is visible.
+- **User isolation** — only the owning user's workspace (including `/app/skills/custom/{userId}/`) is mounted; no other user's workspace is visible.
- **Host-side execution** — skill content is read as data by the host-side loader. Scripts execute exclusively inside agent containers, never on the host.
diff --git a/docs/SPEC.md b/docs/SPEC.md
index 1f18bb6..b5ff6af 100644
--- a/docs/SPEC.md
+++ b/docs/SPEC.md
@@ -16,7 +16,7 @@ flowchart TB
subgraph Clients
TG[Telegram Bot]
WEB[Next.js Dashboard / WebSocket]
- WA[WhatsApp pending]
+ WA[WhatsApp]
SL[Slack pending]
end
@@ -52,7 +52,7 @@ flowchart TB
TG --> CH
WEB --> CH
- WA -.-> CH
+ WA --> CH
SL -.-> CH
CH --> ROUTER
ROUTER --> CMD
@@ -102,12 +102,12 @@ Channels are pluggable adapters that translate platform-specific events (Telegra
### 2.2 Supported channels
-| Channel | Status | Notes |
-| --------------- | ------------- | --------------------------------------------------------------------------- |
-| Telegram | Implemented | Polling (default) or webhook; user keyed by `telegramId`. |
-| Web (WebSocket) | Implemented | JWT-authenticated WebSocket on `/ws/chat`; user keyed by `userId`. |
-| WhatsApp | **[pending]** | `ChannelType` enum + Prisma `Channel` row supported; no adapter registered. |
-| Slack | **[pending]** | Same as WhatsApp — enum and config-crypto stubs exist; no adapter. |
+| Channel | Status | Notes |
+| --------------- | ------------- | ------------------------------------------------------------------ |
+| Telegram | Implemented | Polling (default) or webhook; user keyed by `telegramId`. |
+| Web (WebSocket) | Implemented | JWT-authenticated WebSocket on `/ws/chat`; user keyed by `userId`. |
+| WhatsApp | Implemented | WhatsApp Business API via `@whiskeysockets/baileys`. |
+| Slack | **[pending]** | Enum and config-crypto stubs exist; no adapter yet. |
### 2.3 Channel lifecycle
@@ -146,8 +146,8 @@ clawix_dev/
│ │ └── prisma/ schema.prisma + migrations
│ ├── web/ Next.js 15 dashboard (React 19, Tailwind 4, shadcn/ui)
│ ├── shared/ Cross-package types, Zod schemas, providers, logger
-│ ├── engine/ **[pending]** — reserved; engine code currently lives in api/src/engine
-│ └── worker/ **[pending]** — reserved; background jobs run in-process in api
+│ ├── engine/ Reserved; engine code currently lives in api/src/engine
+│ └── worker/ Reserved; background jobs run in-process in api
├── skills/
│ ├── builtin/ System skills (projector-creator, skill-creator)
│ └── custom/ User-authored skills
@@ -177,6 +177,8 @@ Configuration is env-driven. Values are resolved in this order: **DB-backed conf
| Auth | `JWT_SECRET`, `JWT_EXPIRES_IN`, `JWT_REFRESH_EXPIRES_IN`, `BCRYPT_SALT_ROUNDS` | Access / refresh token config. |
| Crypto | `PROVIDER_ENCRYPTION_KEY` | 32-byte hex; AES-256-GCM for provider & channel secrets. |
| Providers | `DEFAULT_PROVIDER`, `DEFAULT_LLM_MODEL`, `ANTHROPIC_API_KEY`, `OPENAI_API_KEY` | Default LLM routing. |
+| Providers | `GEMINI_API_KEY` | Google Gemini API key. |
+| Providers | `KIMI_CODE_API_KEY` | Kimi Coding Plan API key. |
| Containers | `AGENT_CONTAINER_IMAGE`, `AGENT_MAX_RETRIES`, `AGENT_TIMEOUT_SECONDS` | Container pool defaults. |
| Workspace | `WORKSPACE_BASE_PATH`, `WORKSPACE_HOST_BASE_PATH` (a.k.a. `CLAWIX_HOST_DATA_DIR`) | In-container vs host paths for bind mounts. |
| Skills | `SKILLS_BUILTIN_DIR`, `SKILLS_CUSTOM_DIR`, `SKILLS_*_HOST_DIR`, `MAX_SKILLS_PER_USER` | Skill loader roots (container + host). |
@@ -379,7 +381,19 @@ flowchart LR
---
-## 10. MCP Servers — **[pending]**
+## 10. Prompt Caching
+
+Anthropic prompt caching with frozen-snapshot system prompts (Stage 2). The system prompt is cached once and reused across turns within a session, reducing token costs. Implemented via `@anthropic-ai/sdk` built-in caching with `cacheControl: { type: 'ephemeral' }` breakpoints.
+
+---
+
+## 11. Streaming Multi-Message Delivery
+
+Per-agent streaming delivery flag. When enabled, intermediate agent messages (tool calls, progress updates) are streamed to the client in real-time via WebSocket and channel adapters. Implemented in #63.
+
+---
+
+## 12. MCP Servers — **[pending]**
MCP (Model Context Protocol) is **not yet integrated**. The Anthropic SDK shipped with the repo (`@anthropic-ai/sdk`) includes MCP helpers, but no Clawix code imports them and there is no MCP server registry, config table, or tool adapter.
@@ -412,23 +426,23 @@ Open design questions: per-user vs per-org server configs, credential storage (r
---
-## 11. Deployment
+## 13. Deployment
-### 11.1 Development (`docker-compose.dev.yml`)
+### 13.1 Development (`docker-compose.dev.yml`)
- `postgres` — `postgres:16-alpine` on host port `5433`.
- `redis` — `redis:7-alpine`, appendonly + LRU, 256 MB cap.
- `api-server` — `node:22-slim` with source bind-mounts for hot-reload; runs `pnpm` via corepack. Mounts `/var/run/docker.sock` so the API can spawn agent containers. Bind-mounts `./data`, `./skills`, `./infra/templates`.
- `web` — Next.js dev server (see compose file).
-### 11.2 Production (`docker-compose.prod.yml`)
+### 13.2 Production (`docker-compose.prod.yml`)
- Multi-stage images: `clawix-api:latest` (Dockerfile in `infra/docker/api/`), `clawix-web:latest`, `clawix-agent:latest`.
- API stage 2: `node:22-slim` + `docker.io` + `openssl`; Prisma CLI global; `entrypoint.sh` runs migrations; health check `GET /health` with 60 s start period.
- Redis cap raised to 512 MB; persistent volumes for Postgres and Redis.
- Required secrets: `POSTGRES_USER`, `POSTGRES_PASSWORD`, `JWT_SECRET`, `PROVIDER_ENCRYPTION_KEY`, `CLAWIX_HOST_DATA_DIR`, `CLAWIX_HOST_SKILLS_DIR`.
-### 11.3 Topology
+### 13.3 Topology
```mermaid
flowchart TB
@@ -449,51 +463,51 @@ flowchart TB
---
-## 12. Security Considerations
+## 14. Security Considerations
-### 12.1 Authentication & Authorization
+### 14.1 Authentication & Authorization
- JWT access + refresh tokens; refresh tokens tracked in Redis with TTL.
- Bcrypt password hashing (12 rounds).
- `JwtAuthGuard` + `RolesGuard` + `@Roles()` decorator for RBAC (Admin / User / Guest).
- Policy-throttled endpoints via `policy-throttler.guard.ts` (Redis-backed).
-### 12.2 Container isolation
+### 14.2 Container isolation
- `ContainerPoolService` warms containers per primary agent; `ContainerRunner` wraps `docker` CLI.
- Hardening defaults: non-root UID 1000, `--network none`, PID limit 256, `no-new-privileges`, optional read-only rootfs + tmpfs, CPU 0.5 / mem 512 MB, 10 s graceful stop.
- Spawn tool launches worker sub-agents in fresh containers.
-### 12.3 Mount security (`engine/mount-security.ts`)
+### 14.3 Mount security (`engine/mount-security.ts`)
- **Host-level allowlist** — JSON of allowed roots + blocked patterns.
- **Per-agent allowlist** — additional DB-backed restriction.
- **Default-blocked** — `.ssh`, `.aws`, `.docker`, `.kube`, `*.pem`, `*.key`, `/etc/passwd`, `/proc`, `/sys`, credentials files.
- Symlink resolution + glob match before every mount.
-### 12.4 Secret handling
+### 14.4 Secret handling
- `common/crypto.ts` — AES-256-GCM utilities keyed off `PROVIDER_ENCRYPTION_KEY`.
- `channels/channel-config-crypto.ts` — selective encryption of channel secrets; masked in admin API responses.
- `ProviderConfigRepository` — encrypted API keys with 60 s in-memory cache.
- `scripts/encrypt-secret.mjs` — operator helper.
-### 12.5 HTTP hardening
+### 14.5 HTTP hardening
- Helmet: strict CSP in prod (`default-src 'none'`), HSTS with 1 y + preload, COEP on in prod.
- CORS: explicit allowlist (no wildcards with credentials).
- All API inputs validated with Zod schemas (`packages/shared/src/schemas`).
-### 12.6 Observability & audit
+### 14.6 Observability & audit
- `common/audit-log.interceptor.ts` writes `AuditLog` rows for sensitive actions; rows are append-only.
- `pino` + `pino-http` structured logs; `prom-client` metrics. Grafana / Loki dashboards **[pending]**.
- `TokenCounterService` logs per-call token usage; budgets enforced via user policy.
-### 12.7 Known gaps / pending
+### 14.7 Known gaps / pending
-- WhatsApp and Slack adapters.
-- MCP server integration (§10).
+- Slack adapter.
+- MCP server integration (§12).
- Kubernetes manifests under `infra/k8s/`.
- `packages/engine` and `packages/worker` are reserved but empty; engine code currently lives inside `packages/api/src/engine`.
- Grafana / Loki dashboards and alerting wiring.
diff --git a/infra/docker/web/Dockerfile b/infra/docker/web/Dockerfile
index 3835eb6..8892665 100644
--- a/infra/docker/web/Dockerfile
+++ b/infra/docker/web/Dockerfile
@@ -2,26 +2,7 @@
# Build: docker build -t clawix-web:latest -f infra/docker/web/Dockerfile .
# ─────────────────────────────────────────────────────────────────────────────
-# Stage 1: Install dependencies
-# ─────────────────────────────────────────────────────────────────────────────
-FROM node:22-slim AS deps
-
-RUN corepack enable && corepack prepare pnpm@latest --activate
-
-WORKDIR /app
-
-# Copy workspace config and lockfile
-COPY package.json pnpm-lock.yaml pnpm-workspace.yaml tsconfig.base.json ./
-
-# Copy package.json files for each workspace package
-COPY packages/shared/package.json packages/shared/
-COPY packages/web/package.json packages/web/
-
-# Install all dependencies (including devDependencies for build)
-RUN pnpm install --frozen-lockfile
-
-# ─────────────────────────────────────────────────────────────────────────────
-# Stage 2: Build
+# Stage 1: Build
# ─────────────────────────────────────────────────────────────────────────────
FROM node:22-slim AS builder
@@ -35,13 +16,19 @@ ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
ARG NEXT_PUBLIC_WS_URL
ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL}
-# Copy dependencies from deps stage
-COPY --from=deps /app/node_modules ./node_modules
-COPY --from=deps /app/packages/shared/node_modules ./packages/shared/node_modules
-COPY --from=deps /app/packages/web/node_modules ./packages/web/node_modules
-
-# Copy source code
+# Copy workspace config and package manifests first so the install layer is
+# cached when only source code changes.
COPY package.json pnpm-lock.yaml pnpm-workspace.yaml tsconfig.base.json ./
+COPY packages/shared/package.json packages/shared/
+COPY packages/web/package.json packages/web/
+
+# Install all dependencies (including devDependencies for build).
+# pnpm creates a virtual store at ./node_modules/.pnpm/ with symlinks from each
+# package's node_modules. Installing inside the container ensures the layout
+# matches the Linux platform exactly.
+RUN pnpm install --frozen-lockfile
+
+# Copy source code after install — keeps the install layer cacheable.
COPY packages/shared ./packages/shared
COPY packages/web ./packages/web
@@ -50,7 +37,7 @@ RUN pnpm --filter @clawix/shared build && \
pnpm --filter @clawix/web build
# ─────────────────────────────────────────────────────────────────────────────
-# Stage 3: Production image
+# Stage 2: Production image
# ─────────────────────────────────────────────────────────────────────────────
FROM node:22-slim AS runner
diff --git a/package.json b/package.json
index b479da1..1addd94 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
"docker:prod:logs": "docker compose -f docker-compose.prod.yml logs -f",
"install:clawix": "node scripts/install.mjs",
"update:clawix": "node scripts/update.mjs",
+ "uninstall:clawix": "node scripts/uninstall.mjs",
"db:migrate": "pnpm --filter @clawix/api exec prisma migrate dev && pnpm --filter @clawix/api exec prisma generate",
"db:seed": "pnpm --filter @clawix/api exec prisma db seed",
"db:reset": "pnpm --filter @clawix/api exec prisma migrate reset --force",
diff --git a/packages/api/package.json b/packages/api/package.json
index b003542..453278c 100644
--- a/packages/api/package.json
+++ b/packages/api/package.json
@@ -22,6 +22,7 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.82.0",
"@clawix/shared": "workspace:*",
+ "@fastify/cookie": "^11.0.2",
"@fastify/cors": "^11.2.0",
"@fastify/helmet": "^13.0.2",
"@fastify/multipart": "^10.0.0",
@@ -53,6 +54,7 @@
"pg": "^8.20.0",
"pino": "^9.14.0",
"pino-http": "^11.0.0",
+ "prom-client": "^15.1.3",
"qrcode-terminal": "^0.12.0",
"reflect-metadata": "^0.2.0",
"rxjs": "^7.8.0",
diff --git a/packages/api/src/__tests__/auth.integration.test.ts b/packages/api/src/__tests__/auth.integration.test.ts
index 0527f46..df607c9 100644
--- a/packages/api/src/__tests__/auth.integration.test.ts
+++ b/packages/api/src/__tests__/auth.integration.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { describe, it, expect, beforeAll, beforeEach, afterAll } from 'vitest';
import { Test } from '@nestjs/testing';
import { ConfigModule } from '@nestjs/config';
import { FastifyAdapter, type NestFastifyApplication } from '@nestjs/platform-fastify';
@@ -6,6 +6,7 @@ import { APP_FILTER, APP_GUARD } from '@nestjs/core';
import { PassportModule } from '@nestjs/passport';
import { JwtModule } from '@nestjs/jwt';
import { hash } from 'bcryptjs';
+import cookie from '@fastify/cookie';
import { AuthController } from '../auth/auth.controller.js';
import { AuthService } from '../auth/auth.service.js';
import { JwtStrategy } from '../auth/jwt.strategy.js';
@@ -85,10 +86,21 @@ describe('Auth Integration', () => {
}).compile();
app = moduleRef.createNestApplication(new FastifyAdapter());
+ // Register @fastify/cookie so reply.setCookie / req.cookies work in
+ // the auth controller. Production registers this in security.config.ts;
+ // the integration test must do it explicitly since it skips main
+ // bootstrap.
+ await app.getHttpAdapter().getInstance().register(cookie);
await app.init();
await app.getHttpAdapter().getInstance().ready();
});
+ beforeEach(() => {
+ // Reset the in-memory Redis store between tests so leftover login_fail
+ // counters or refresh tokens from one test don't affect the next.
+ redisStore.clear();
+ });
+
afterAll(async () => {
await app?.close();
});
diff --git a/packages/api/src/agents/agents.controller.ts b/packages/api/src/agents/agents.controller.ts
index eb3eb28..84d244d 100644
--- a/packages/api/src/agents/agents.controller.ts
+++ b/packages/api/src/agents/agents.controller.ts
@@ -129,7 +129,7 @@ export class AgentsController {
@Req() req: AuthRequest,
) {
const { user } = req;
- return this.agentsService.createAgent(body, user.sub);
+ return this.agentsService.createAgent(body, user.sub, user.role);
}
@Patch(':id')
diff --git a/packages/api/src/agents/agents.service.ts b/packages/api/src/agents/agents.service.ts
index e53f420..43b0bdc 100644
--- a/packages/api/src/agents/agents.service.ts
+++ b/packages/api/src/agents/agents.service.ts
@@ -51,9 +51,12 @@ export class AgentsService {
async createAgent(
input: CreateAgentDefinitionInput,
createdById?: string,
+ userRole?: string,
): Promise {
- // User-created agents are always custom (not official)
- return this.agentDefRepo.create({ ...input, createdById, isOfficial: false });
+ // Only admins may create Public (official) agents; force false otherwise
+ // so non-admins can't escalate by setting the flag in the request body.
+ const isOfficial = userRole === 'admin' ? (input.isOfficial ?? false) : false;
+ return this.agentDefRepo.create({ ...input, createdById, isOfficial });
}
async updateAgent(
diff --git a/packages/api/src/auth/__tests__/auth.controller.test.ts b/packages/api/src/auth/__tests__/auth.controller.test.ts
new file mode 100644
index 0000000..180045a
--- /dev/null
+++ b/packages/api/src/auth/__tests__/auth.controller.test.ts
@@ -0,0 +1,209 @@
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import type { FastifyReply, FastifyRequest } from 'fastify';
+import { AuthController } from '../auth.controller.js';
+import { AuthService } from '../auth.service.js';
+import {
+ REFRESH_COOKIE_NAME,
+ REFRESH_COOKIE_PATH,
+ REFRESH_COOKIE_MAX_AGE,
+} from '../auth.constants.js';
+
+interface CookieCall {
+ name: string;
+ value: string;
+ opts: Record;
+}
+
+interface ClearCall {
+ name: string;
+ opts: Record;
+}
+
+interface FakeReply {
+ setCookieCalls: CookieCall[];
+ clearCookieCalls: ClearCall[];
+ setCookie: (name: string, value: string, opts: Record) => FakeReply;
+ clearCookie: (name: string, opts: Record) => FakeReply;
+}
+
+function makeReply(): FakeReply {
+ const r: FakeReply = {
+ setCookieCalls: [],
+ clearCookieCalls: [],
+ setCookie(name, value, opts) {
+ this.setCookieCalls.push({ name, value, opts });
+ return this;
+ },
+ clearCookie(name, opts) {
+ this.clearCookieCalls.push({ name, opts });
+ return this;
+ },
+ };
+ return r;
+}
+
+function makeRequest(
+ cookies: Record = {},
+ protocol: 'http' | 'https' = 'https',
+): FastifyRequest {
+ return { cookies, protocol } as unknown as FastifyRequest;
+}
+
+describe('AuthController — cookie handling', () => {
+ let authService: {
+ login: ReturnType;
+ refresh: ReturnType;
+ logout: ReturnType;
+ };
+ let controller: AuthController;
+
+ beforeEach(() => {
+ authService = {
+ login: vi.fn().mockResolvedValue({ accessToken: 'access-abc', refreshToken: 'refresh-xyz' }),
+ refresh: vi
+ .fn()
+ .mockResolvedValue({ accessToken: 'access-new', refreshToken: 'refresh-new' }),
+ logout: vi.fn().mockResolvedValue(undefined),
+ };
+ controller = new AuthController(authService as unknown as AuthService);
+ });
+
+ describe('POST /auth/login', () => {
+ it('returns accessToken in body and sets refresh cookie with httpOnly + sameSite=strict', async () => {
+ const reply = makeReply();
+ const req = makeRequest({}, 'https');
+ const result = await controller.login(
+ { email: 'admin@clawix.test', password: 'password1234' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+
+ // Backward-compat: body still includes refreshToken so existing
+ // localStorage-based clients keep working until web migration lands.
+ expect(result).toEqual({ accessToken: 'access-abc', refreshToken: 'refresh-xyz' });
+
+ // Cookie set with proper flags
+ expect(reply.setCookieCalls).toHaveLength(1);
+ const call = reply.setCookieCalls[0]!;
+ expect(call.name).toBe(REFRESH_COOKIE_NAME);
+ expect(call.value).toBe('refresh-xyz');
+ expect(call.opts).toMatchObject({
+ httpOnly: true,
+ sameSite: 'strict',
+ path: REFRESH_COOKIE_PATH,
+ maxAge: REFRESH_COOKIE_MAX_AGE,
+ });
+ });
+
+ it('sets secure=true on the refresh cookie when the request scheme is https', async () => {
+ const reply = makeReply();
+ const req = makeRequest({}, 'https');
+ await controller.login(
+ { email: 'admin@clawix.test', password: 'password1234' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+ expect(reply.setCookieCalls[0]!.opts).toMatchObject({ secure: true });
+ });
+
+ it('sets secure=false on the refresh cookie when the request scheme is http', async () => {
+ const reply = makeReply();
+ const req = makeRequest({}, 'http');
+ await controller.login(
+ { email: 'admin@clawix.test', password: 'password1234' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+ expect(reply.setCookieCalls[0]!.opts).toMatchObject({ secure: false });
+ });
+ });
+
+ describe('POST /auth/refresh', () => {
+ it('reads refreshToken from cookie when present (cookie wins over body)', async () => {
+ const reply = makeReply();
+ const req = makeRequest({ [REFRESH_COOKIE_NAME]: 'cookie-token' });
+
+ const result = await controller.refresh(
+ { refreshToken: 'body-token-ignored' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+
+ expect(authService.refresh).toHaveBeenCalledWith('cookie-token');
+ expect(result).toEqual({ accessToken: 'access-new', refreshToken: 'refresh-new' });
+
+ // New refresh cookie is rotated
+ expect(reply.setCookieCalls).toHaveLength(1);
+ expect(reply.setCookieCalls[0]).toMatchObject({
+ name: REFRESH_COOKIE_NAME,
+ value: 'refresh-new',
+ });
+ });
+
+ it('falls back to body when no cookie present (backward compat for curl/scripts)', async () => {
+ const reply = makeReply();
+ const req = makeRequest({});
+
+ await controller.refresh(
+ { refreshToken: 'body-token' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+
+ expect(authService.refresh).toHaveBeenCalledWith('body-token');
+ });
+
+ it('rotates a secure cookie when the refresh request arrives over https', async () => {
+ const reply = makeReply();
+ const req = makeRequest({ [REFRESH_COOKIE_NAME]: 'cookie-token' }, 'https');
+
+ await controller.refresh({ refreshToken: '' }, req, reply as unknown as FastifyReply);
+
+ expect(reply.setCookieCalls[0]!.opts).toMatchObject({ secure: true });
+ });
+
+ it('rotates a non-secure cookie when the refresh request arrives over http', async () => {
+ const reply = makeReply();
+ const req = makeRequest({ [REFRESH_COOKIE_NAME]: 'cookie-token' }, 'http');
+
+ await controller.refresh({ refreshToken: '' }, req, reply as unknown as FastifyReply);
+
+ expect(reply.setCookieCalls[0]!.opts).toMatchObject({ secure: false });
+ });
+ });
+
+ describe('POST /auth/logout', () => {
+ it('reads cookie when present, clears it on response', async () => {
+ const reply = makeReply();
+ const req = makeRequest({ [REFRESH_COOKIE_NAME]: 'cookie-token' });
+
+ await controller.logout(
+ { refreshToken: 'body-token-ignored' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+
+ expect(authService.logout).toHaveBeenCalledWith('cookie-token');
+ expect(reply.clearCookieCalls).toHaveLength(1);
+ expect(reply.clearCookieCalls[0]).toMatchObject({
+ name: REFRESH_COOKIE_NAME,
+ opts: { path: REFRESH_COOKIE_PATH },
+ });
+ });
+
+ it('falls back to body refreshToken when no cookie (backward compat)', async () => {
+ const reply = makeReply();
+ const req = makeRequest({});
+
+ await controller.logout(
+ { refreshToken: 'body-token' },
+ req,
+ reply as unknown as FastifyReply,
+ );
+
+ expect(authService.logout).toHaveBeenCalledWith('body-token');
+ // Cookie still cleared (idempotent — sends Set-Cookie with past expiry)
+ expect(reply.clearCookieCalls).toHaveLength(1);
+ });
+ });
+});
diff --git a/packages/api/src/auth/__tests__/auth.service.test.ts b/packages/api/src/auth/__tests__/auth.service.test.ts
new file mode 100644
index 0000000..8fb83fa
--- /dev/null
+++ b/packages/api/src/auth/__tests__/auth.service.test.ts
@@ -0,0 +1,161 @@
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { ConfigService } from '@nestjs/config';
+import { JwtService } from '@nestjs/jwt';
+import { hash } from 'bcryptjs';
+import { AuthService } from '../auth.service.js';
+import { LOGIN_FAIL_PREFIX, LOGIN_FAIL_TTL_SECONDS, MAX_DELAY_SECONDS } from '../auth.constants.js';
+
+interface FailRecord {
+ count: number;
+ lastAttempt: number;
+}
+
+interface FakeRedis {
+ store: Map;
+ get(key: string): Promise;
+ set(key: string, value: unknown, opts?: { ttlSeconds?: number }): Promise;
+ del(key: string): Promise;
+ lastSetTtl?: number;
+}
+
+function makeRedis(): FakeRedis {
+ const store = new Map();
+ return {
+ store,
+ async get(key: string) {
+ return (store.get(key) as T | undefined) ?? null;
+ },
+ async set(key, value, opts) {
+ store.set(key, value);
+ this.lastSetTtl = opts?.ttlSeconds;
+ },
+ async del(key) {
+ return store.delete(key);
+ },
+ };
+}
+
+const TEST_EMAIL = 'delay-test@example.com';
+const VALID_EMAIL = 'valid@example.com';
+const VALID_PASSWORD = 'correct-password';
+const WRONG_PASSWORD = 'wrong-password';
+
+async function buildService(redis: FakeRedis, validUserHash?: string): Promise {
+ const prisma = {
+ user: {
+ findUnique: vi.fn(async ({ where }: { where: { email?: string; id?: string } }) => {
+ if (where.email === VALID_EMAIL || where.id === 'user-1') {
+ return {
+ id: 'user-1',
+ email: VALID_EMAIL,
+ passwordHash: validUserHash,
+ role: 'admin',
+ isActive: true,
+ policy: { name: 'Standard' },
+ };
+ }
+ return null;
+ }),
+ },
+ };
+ const jwt = { sign: vi.fn(() => 'fake-jwt-token') };
+ const config = {
+ getOrThrow: vi.fn(() => 'test-secret'),
+ get: vi.fn(() => '12'),
+ };
+
+ return new AuthService(
+ prisma as never,
+ jwt as unknown as JwtService,
+ redis as never,
+ config as unknown as ConfigService,
+ );
+}
+
+describe('AuthService — progressive login delay', () => {
+ let redis: FakeRedis;
+ let service: AuthService;
+
+ beforeEach(async () => {
+ redis = makeRedis();
+ service = await buildService(redis);
+ });
+
+ it('allows the first login attempt without delay (no Redis entry yet)', async () => {
+ await expect(service.login(TEST_EMAIL, WRONG_PASSWORD)).rejects.toThrow('Invalid credentials');
+ });
+
+ it('records a failed attempt in Redis with count=1 after first failure', async () => {
+ await service.login(TEST_EMAIL, WRONG_PASSWORD).catch(() => {});
+
+ const failData = (await redis.get(`${LOGIN_FAIL_PREFIX}${TEST_EMAIL}`)) ?? null;
+ expect(failData).not.toBeNull();
+ expect(failData?.count).toBe(1);
+ expect(failData?.lastAttempt).toBeTypeOf('number');
+ });
+
+ it('persists the fail record with the configured TTL', async () => {
+ await service.login(TEST_EMAIL, WRONG_PASSWORD).catch(() => {});
+ expect(redis.lastSetTtl).toBe(LOGIN_FAIL_TTL_SECONDS);
+ });
+
+ it('throws TooManyRequests when retried immediately after a failure', async () => {
+ await service.login(TEST_EMAIL, WRONG_PASSWORD).catch(() => {});
+
+ await expect(service.login(TEST_EMAIL, WRONG_PASSWORD)).rejects.toThrow(/Try again in/);
+ });
+
+ it('increments fail count on subsequent failures (after the delay window)', async () => {
+ // Seed an existing fail with lastAttempt in the past so the next attempt is allowed.
+ await redis.set(
+ `${LOGIN_FAIL_PREFIX}${TEST_EMAIL}`,
+ { count: 1, lastAttempt: Date.now() - 5000 },
+ { ttlSeconds: LOGIN_FAIL_TTL_SECONDS },
+ );
+
+ await service.login(TEST_EMAIL, WRONG_PASSWORD).catch(() => {});
+
+ const failData = await redis.get(`${LOGIN_FAIL_PREFIX}${TEST_EMAIL}`);
+ expect(failData?.count).toBe(2);
+ });
+
+ it('caps the required delay at MAX_DELAY_SECONDS even with very high counts', async () => {
+ // count=10 → 2^10 = 1024s, must be capped to MAX_DELAY_SECONDS (30s)
+ await redis.set(
+ `${LOGIN_FAIL_PREFIX}${TEST_EMAIL}`,
+ { count: 10, lastAttempt: Date.now() - (MAX_DELAY_SECONDS - 5) * 1000 },
+ { ttlSeconds: LOGIN_FAIL_TTL_SECONDS },
+ );
+
+ // Still inside the 30s window → blocked
+ await expect(service.login(TEST_EMAIL, WRONG_PASSWORD)).rejects.toThrow(/Try again in/);
+
+ // Move just past the 30s cap
+ await redis.set(
+ `${LOGIN_FAIL_PREFIX}${TEST_EMAIL}`,
+ { count: 10, lastAttempt: Date.now() - (MAX_DELAY_SECONDS + 1) * 1000 },
+ { ttlSeconds: LOGIN_FAIL_TTL_SECONDS },
+ );
+
+ // Now allowed (will fail with Invalid credentials, not TooManyRequests)
+ await expect(service.login(TEST_EMAIL, WRONG_PASSWORD)).rejects.toThrow('Invalid credentials');
+ });
+
+ it('clears the fail record on a successful login', async () => {
+ const validHash = await hash(VALID_PASSWORD, 4);
+ service = await buildService(redis, validHash);
+
+ await redis.set(
+ `${LOGIN_FAIL_PREFIX}${VALID_EMAIL}`,
+ { count: 3, lastAttempt: Date.now() - 60_000 },
+ { ttlSeconds: LOGIN_FAIL_TTL_SECONDS },
+ );
+
+ const tokens = await service.login(VALID_EMAIL, VALID_PASSWORD);
+ expect(tokens.accessToken).toBeDefined();
+ expect(tokens.refreshToken).toBeDefined();
+
+ const failData = await redis.get(`${LOGIN_FAIL_PREFIX}${VALID_EMAIL}`);
+ expect(failData).toBeNull();
+ });
+});
diff --git a/packages/api/src/auth/auth.constants.ts b/packages/api/src/auth/auth.constants.ts
index bfb8262..469e974 100644
--- a/packages/api/src/auth/auth.constants.ts
+++ b/packages/api/src/auth/auth.constants.ts
@@ -3,3 +3,15 @@ export const JWT_REFRESH_EXPIRY = '7d';
export const REFRESH_TOKEN_PREFIX = 'refresh_token:';
export const REFRESH_TOKEN_TTL_SECONDS = 7 * 24 * 60 * 60; // 7 days
export const BCRYPT_SALT_ROUNDS_DEFAULT = 12;
+
+// Progressive login delay (per-email)
+export const LOGIN_FAIL_PREFIX = 'login_fail:';
+export const LOGIN_FAIL_TTL_SECONDS = 3600; // 1 hour
+export const MAX_DELAY_SECONDS = 30;
+
+// Refresh token cookie
+export const REFRESH_COOKIE_NAME = 'clawix_refresh';
+// AuthController is mounted at `/auth` (no /api/v1 prefix); cookie path must
+// match so the browser sends it on /auth/refresh and /auth/logout.
+export const REFRESH_COOKIE_PATH = '/auth';
+export const REFRESH_COOKIE_MAX_AGE = 7 * 24 * 60 * 60; // 7 days in seconds
diff --git a/packages/api/src/auth/auth.controller.ts b/packages/api/src/auth/auth.controller.ts
index 18cff46..8910fcd 100644
--- a/packages/api/src/auth/auth.controller.ts
+++ b/packages/api/src/auth/auth.controller.ts
@@ -1,15 +1,48 @@
-import { Body, Controller, HttpCode, HttpStatus, Post } from '@nestjs/common';
+import {
+ Body,
+ Controller,
+ HttpCode,
+ HttpStatus,
+ Post,
+ Req,
+ Res,
+ UnauthorizedException,
+} from '@nestjs/common';
import { ApiTags } from '@nestjs/swagger';
import { Throttle } from '@nestjs/throttler';
+import type { FastifyReply, FastifyRequest } from 'fastify';
import { loginSchema, refreshSchema, type LoginInput, type RefreshInput } from '@clawix/shared';
import {
- AUTH_THROTTLE_BLOCK_MS,
- AUTH_THROTTLE_LIMIT,
AUTH_THROTTLE_TTL_MS,
+ LOGIN_THROTTLE_BLOCK_MS,
+ LOGIN_THROTTLE_LIMIT,
+ REFRESH_THROTTLE_LIMIT,
} from '../common/throttle.config.js';
import { ZodValidationPipe } from '../common/zod-validation.pipe.js';
import { AuthService } from './auth.service.js';
import { Public } from './public.decorator.js';
+import {
+ REFRESH_COOKIE_MAX_AGE,
+ REFRESH_COOKIE_NAME,
+ REFRESH_COOKIE_PATH,
+} from './auth.constants.js';
+
+function setRefreshCookie(req: FastifyRequest, reply: FastifyReply, refreshToken: string): void {
+ // Browsers silently drop Secure cookies on http:// — derive the flag from
+ // the request scheme so the same image works for tailnet/LAN HTTP and a
+ // TLS-terminating proxy. Trust X-Forwarded-Proto via Fastify trustProxy.
+ reply.setCookie(REFRESH_COOKIE_NAME, refreshToken, {
+ httpOnly: true,
+ secure: req.protocol === 'https',
+ sameSite: 'strict',
+ path: REFRESH_COOKIE_PATH,
+ maxAge: REFRESH_COOKIE_MAX_AGE,
+ });
+}
+
+function clearRefreshCookie(reply: FastifyReply): void {
+ reply.clearCookie(REFRESH_COOKIE_NAME, { path: REFRESH_COOKIE_PATH });
+}
@ApiTags('auth')
@Controller('auth')
@@ -19,35 +52,60 @@ export class AuthController {
@Public()
@Throttle({
default: {
- limit: AUTH_THROTTLE_LIMIT,
+ limit: LOGIN_THROTTLE_LIMIT,
ttl: AUTH_THROTTLE_TTL_MS,
- blockDuration: AUTH_THROTTLE_BLOCK_MS,
+ blockDuration: LOGIN_THROTTLE_BLOCK_MS,
},
})
@Post('login')
@HttpCode(HttpStatus.OK)
- login(@Body(new ZodValidationPipe(loginSchema)) body: LoginInput) {
- return this.authService.login(body.email, body.password);
+ async login(
+ @Body(new ZodValidationPipe(loginSchema)) body: LoginInput,
+ @Req() req: FastifyRequest,
+ @Res({ passthrough: true }) reply: FastifyReply,
+ ) {
+ const tokens = await this.authService.login(body.email, body.password);
+ setRefreshCookie(req, reply, tokens.refreshToken);
+ // Body still includes refreshToken for backward compat with localStorage
+ // clients and scripts. Web migration in Tasks 7-9 will stop reading it.
+ return tokens;
}
@Public()
@Post('logout')
@HttpCode(HttpStatus.NO_CONTENT)
- async logout(@Body(new ZodValidationPipe(refreshSchema)) body: RefreshInput) {
- await this.authService.logout(body.refreshToken);
+ async logout(
+ @Body(new ZodValidationPipe(refreshSchema)) body: RefreshInput,
+ @Req() req: FastifyRequest,
+ @Res({ passthrough: true }) reply: FastifyReply,
+ ) {
+ const refreshToken = req.cookies?.[REFRESH_COOKIE_NAME] ?? body.refreshToken;
+ if (refreshToken) {
+ await this.authService.logout(refreshToken);
+ }
+ clearRefreshCookie(reply);
}
@Public()
@Throttle({
default: {
- limit: AUTH_THROTTLE_LIMIT,
+ limit: REFRESH_THROTTLE_LIMIT,
ttl: AUTH_THROTTLE_TTL_MS,
- blockDuration: AUTH_THROTTLE_BLOCK_MS,
},
})
@Post('refresh')
@HttpCode(HttpStatus.OK)
- refresh(@Body(new ZodValidationPipe(refreshSchema)) body: RefreshInput) {
- return this.authService.refresh(body.refreshToken);
+ async refresh(
+ @Body(new ZodValidationPipe(refreshSchema)) body: RefreshInput,
+ @Req() req: FastifyRequest,
+ @Res({ passthrough: true }) reply: FastifyReply,
+ ) {
+ const refreshToken = req.cookies?.[REFRESH_COOKIE_NAME] ?? body.refreshToken;
+ if (!refreshToken) {
+ throw new UnauthorizedException('No refresh token');
+ }
+ const tokens = await this.authService.refresh(refreshToken);
+ setRefreshCookie(req, reply, tokens.refreshToken);
+ return tokens;
}
}
diff --git a/packages/api/src/auth/auth.service.ts b/packages/api/src/auth/auth.service.ts
index 9345036..9cdb387 100644
--- a/packages/api/src/auth/auth.service.ts
+++ b/packages/api/src/auth/auth.service.ts
@@ -1,4 +1,10 @@
-import { Inject, Injectable, UnauthorizedException } from '@nestjs/common';
+import {
+ HttpException,
+ HttpStatus,
+ Inject,
+ Injectable,
+ UnauthorizedException,
+} from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { JwtService } from '@nestjs/jwt';
import { compare } from 'bcryptjs';
@@ -8,11 +14,25 @@ import { PrismaService } from '../prisma/prisma.service.js';
import {
BCRYPT_SALT_ROUNDS_DEFAULT,
JWT_ACCESS_EXPIRY,
+ LOGIN_FAIL_PREFIX,
+ LOGIN_FAIL_TTL_SECONDS,
+ MAX_DELAY_SECONDS,
REFRESH_TOKEN_PREFIX,
REFRESH_TOKEN_TTL_SECONDS,
} from './auth.constants.js';
import type { JwtPayload, TokenPair } from './auth.types.js';
+interface LoginFailRecord {
+ count: number;
+ lastAttempt: number;
+}
+
+class TooManyRequestsException extends HttpException {
+ constructor(message: string) {
+ super(message, HttpStatus.TOO_MANY_REQUESTS);
+ }
+}
+
@Injectable()
export class AuthService {
private readonly jwtSecret: string;
@@ -31,20 +51,26 @@ export class AuthService {
}
async login(email: string, password: string): Promise {
+ await this.checkLoginDelay(email);
+
const user = await this.prisma.user.findUnique({
where: { email },
include: { policy: { select: { name: true } } },
});
if (!user || !user.isActive) {
+ await this.recordFailedAttempt(email);
throw new UnauthorizedException('Invalid credentials');
}
const passwordValid = await compare(password, user.passwordHash);
if (!passwordValid) {
+ await this.recordFailedAttempt(email);
throw new UnauthorizedException('Invalid credentials');
}
+ await this.clearFailedAttempts(email);
+
return this.generateTokenPair({
sub: user.id,
email: user.email,
@@ -53,6 +79,32 @@ export class AuthService {
});
}
+ private async checkLoginDelay(email: string): Promise {
+ const failData = await this.redis.get(`${LOGIN_FAIL_PREFIX}${email}`);
+ if (!failData) return;
+
+ const requiredDelayMs = Math.min(2 ** failData.count, MAX_DELAY_SECONDS) * 1000;
+ const elapsedMs = Date.now() - failData.lastAttempt;
+ if (elapsedMs < requiredDelayMs) {
+ const remaining = Math.ceil((requiredDelayMs - elapsedMs) / 1000);
+ throw new TooManyRequestsException(`Too many attempts. Try again in ${remaining}s`);
+ }
+ }
+
+ private async recordFailedAttempt(email: string): Promise {
+ const key = `${LOGIN_FAIL_PREFIX}${email}`;
+ const existing = await this.redis.get(key);
+ await this.redis.set(
+ key,
+ { count: (existing?.count ?? 0) + 1, lastAttempt: Date.now() },
+ { ttlSeconds: LOGIN_FAIL_TTL_SECONDS },
+ );
+ }
+
+ private async clearFailedAttempts(email: string): Promise {
+ await this.redis.del(`${LOGIN_FAIL_PREFIX}${email}`);
+ }
+
async refresh(refreshToken: string): Promise {
const userId = await this.redis.get(`${REFRESH_TOKEN_PREFIX}${refreshToken}`);
diff --git a/packages/api/src/bootstrap.ts b/packages/api/src/bootstrap.ts
index db0c824..48f45d8 100644
--- a/packages/api/src/bootstrap.ts
+++ b/packages/api/src/bootstrap.ts
@@ -12,6 +12,7 @@
* Invocation (inside the prod image): `node dist/bootstrap.js`
*/
import { PrismaPg } from '@prisma/adapter-pg';
+import { listProviders } from '@clawix/shared';
import { PrismaClient } from './generated/prisma/client.js';
import bcrypt from 'bcryptjs';
import { encrypt } from './common/crypto.js';
@@ -48,16 +49,18 @@ interface ProviderSeed {
}
function buildProviderSeeds(): ProviderSeed[] {
- const seeds: ProviderSeed[] = [
- { provider: 'anthropic', displayName: 'Anthropic', envKey: 'ANTHROPIC_API_KEY' },
- { provider: 'openai', displayName: 'OpenAI', envKey: 'OPENAI_API_KEY' },
- {
- provider: 'zai-coding',
- displayName: 'Z.AI Coding Plan',
- envKey: 'ZAI_CODING_API_KEY',
- baseUrl: 'https://api.z.ai/api/coding/paas/v4',
- },
- ];
+ // Derive from the registry so bootstrap, the installer, and the runtime
+ // SDK never drift. The 'custom' entry is excluded — it's a placeholder
+ // spec; real custom providers come from CUSTOM_PROVIDER_* env vars below.
+ const seeds: ProviderSeed[] = listProviders()
+ .filter((p) => p.name !== 'custom')
+ .map((p) => ({
+ provider: p.name,
+ displayName: p.displayName,
+ envKey: p.envKey,
+ ...(p.defaultBaseUrl ? { baseUrl: p.defaultBaseUrl } : {}),
+ }));
+
const customName = process.env['CUSTOM_PROVIDER_NAME'];
const customBase = process.env['CUSTOM_PROVIDER_BASE_URL'];
if (customName && customBase) {
diff --git a/packages/api/src/channels/__tests__/agent-error-message.test.ts b/packages/api/src/channels/__tests__/agent-error-message.test.ts
deleted file mode 100644
index d6417f0..0000000
--- a/packages/api/src/channels/__tests__/agent-error-message.test.ts
+++ /dev/null
@@ -1,144 +0,0 @@
-import { describe, it, expect } from 'vitest';
-
-import { classifyAgentError } from '../agent-error-message.js';
-
-describe('classifyAgentError', () => {
- describe('network category', () => {
- it('classifies undici connect-timeout errors', () => {
- const err = new Error(
- 'Gemini network error: fetch failed (UND_ERR_CONNECT_TIMEOUT): Connect Timeout Error',
- );
- const result = classifyAgentError(err);
- expect(result.category).toBe('network');
- expect(result.text).toMatch(/can't reach the AI provider/i);
- });
-
- it('classifies bare "fetch failed" via cause.code', () => {
- const cause = Object.assign(new Error('connect ECONNREFUSED'), { code: 'ECONNREFUSED' });
- const err = Object.assign(new TypeError('fetch failed'), { cause });
- const result = classifyAgentError(err);
- expect(result.category).toBe('network');
- });
-
- it('classifies ENOTFOUND DNS failures', () => {
- const err = new Error('getaddrinfo ENOTFOUND api.example.com');
- const result = classifyAgentError(err);
- expect(result.category).toBe('network');
- });
-
- it('classifies generic ETIMEDOUT', () => {
- const err = new Error('Connection ETIMEDOUT');
- const result = classifyAgentError(err);
- expect(result.category).toBe('network');
- });
- });
-
- describe('auth category', () => {
- it('classifies "Gemini auth failed"', () => {
- const err = new Error('Gemini auth failed: API_KEY_INVALID');
- const result = classifyAgentError(err);
- expect(result.category).toBe('auth');
- expect(result.text).toMatch(/admin/i);
- });
-
- it('classifies generic 401 messages', () => {
- const err = new Error('Request failed with status 401 Unauthorized');
- const result = classifyAgentError(err);
- expect(result.category).toBe('auth');
- });
- });
-
- describe('rate_limit category', () => {
- it('classifies Gemini rate limit', () => {
- const err = new Error('Gemini rate limit: Quota exceeded');
- const result = classifyAgentError(err);
- expect(result.category).toBe('rate_limit');
- expect(result.text).toMatch(/rate limit|wait/i);
- });
-
- it('classifies generic 429 messages', () => {
- const err = new Error('429 Too Many Requests');
- const result = classifyAgentError(err);
- expect(result.category).toBe('rate_limit');
- });
- });
-
- describe('bad_request category', () => {
- it('classifies provider request rejections', () => {
- const err = new Error('Gemini request rejected: Invalid argument');
- const result = classifyAgentError(err);
- expect(result.category).toBe('bad_request');
- expect(result.text).toMatch(/couldn't process|rejected/i);
- });
- });
-
- describe('policy category', () => {
- it('classifies provider-not-allowed errors thrown by agent-runner', () => {
- const err = new Error("Provider 'openai' is not allowed by policy 'standard'");
- const result = classifyAgentError(err);
- expect(result.category).toBe('policy');
- expect(result.text).toMatch(/policy|administrator|plan/i);
- });
-
- it('classifies token-budget-exceeded errors', () => {
- const err = new Error("Token budget exceeded for user 'u1': $1.0000 used of $0.5000 budget");
- const result = classifyAgentError(err);
- expect(result.category).toBe('policy');
- expect(result.text).toMatch(/budget|limit|administrator/i);
- });
-
- it('classifies inactive agent definition errors', () => {
- const err = new Error("Agent definition 'abc' is inactive");
- const result = classifyAgentError(err);
- expect(result.category).toBe('policy');
- });
- });
-
- describe('content_filter category', () => {
- it('classifies Moonshot/Kimi safety rejection as content_filter', () => {
- const err = new Error(
- '400 System detected potentially unsafe or sensitive content in input or generation.',
- );
- const result = classifyAgentError(err);
- expect(result.category).toBe('content_filter');
- expect(result.text).toMatch(/flagged|unsafe|rephrase/i);
- });
-
- it('classifies OpenAI content-policy rejection as content_filter', () => {
- const err = new Error('Your request was rejected as a result of our content policy.');
- const result = classifyAgentError(err);
- expect(result.category).toBe('content_filter');
- });
-
- it('classifies Anthropic safety-system rejection as content_filter', () => {
- const err = new Error('Output blocked by safety system');
- const result = classifyAgentError(err);
- expect(result.category).toBe('content_filter');
- });
- });
-
- describe('unknown category', () => {
- it('falls back for unrecognized errors', () => {
- const err = new Error('something completely unexpected');
- const result = classifyAgentError(err);
- expect(result.category).toBe('unknown');
- expect(result.text).toMatch(/something went wrong/i);
- });
-
- it('handles non-Error throws gracefully', () => {
- const result = classifyAgentError('boom');
- expect(result.category).toBe('unknown');
- });
-
- it('handles undefined gracefully', () => {
- const result = classifyAgentError(undefined);
- expect(result.category).toBe('unknown');
- });
- });
-
- it('never includes stack traces or internal paths in the user text', () => {
- const err = new Error('boom\n at foo (/app/dist/internal.js:123:45)');
- const result = classifyAgentError(err);
- expect(result.text).not.toMatch(/\/app\/|at foo|\.js:/);
- });
-});
diff --git a/packages/api/src/channels/agent-error-message.ts b/packages/api/src/channels/agent-error-message.ts
deleted file mode 100644
index 612bc79..0000000
--- a/packages/api/src/channels/agent-error-message.ts
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * Maps agent-execution errors to user-facing categories + safe display text.
- *
- * Provider-agnostic: matches on patterns from any provider's normalized error
- * message (anthropic, openai, gemini, …) plus undici fetch-cause `code`s.
- *
- * Returned `text` never contains stack traces, file paths, or raw provider
- * internals — safe to send directly to end users on any channel.
- */
-
-export type AgentErrorCategory =
- | 'network'
- | 'auth'
- | 'rate_limit'
- | 'bad_request'
- | 'content_filter'
- | 'policy'
- | 'unknown';
-
-export interface ClassifiedAgentError {
- readonly category: AgentErrorCategory;
- readonly text: string;
-}
-
-const NETWORK_CODES = new Set([
- 'ECONNREFUSED',
- 'ECONNRESET',
- 'ETIMEDOUT',
- 'ENOTFOUND',
- 'EAI_AGAIN',
- 'EPIPE',
- 'EHOSTUNREACH',
- 'ENETUNREACH',
- 'UND_ERR_CONNECT_TIMEOUT',
- 'UND_ERR_HEADERS_TIMEOUT',
- 'UND_ERR_BODY_TIMEOUT',
- 'UND_ERR_SOCKET',
-]);
-
-const NETWORK_PATTERNS = [
- 'fetch failed',
- 'network error',
- 'connect timeout',
- 'socket hang up',
- 'etimedout',
- 'econnreset',
- 'econnrefused',
- 'enotfound',
- 'und_err_',
-];
-
-const AUTH_PATTERNS = [
- 'auth failed',
- 'unauthorized',
- 'api_key_invalid',
- 'invalid api key',
- 'status 401',
- '401 ',
-];
-
-const RATE_LIMIT_PATTERNS = [
- 'rate limit',
- 'rate_limit',
- 'quota exceeded',
- 'status 429',
- '429 ',
- 'too many requests',
-];
-
-const BAD_REQUEST_PATTERNS = [
- 'request rejected',
- 'invalid argument',
- 'status 400',
- '400 bad request',
-];
-
-const POLICY_PATTERNS = ['is not allowed by policy', 'token budget exceeded', 'is inactive'];
-
-const CONTENT_FILTER_PATTERNS = [
- 'unsafe or sensitive content',
- 'potentially unsafe',
- 'safety system',
- 'content policy',
- 'content_policy',
- 'content_filter',
- 'flagged as inappropriate',
- 'violates our usage policy',
- 'violates our content policy',
-];
-
-const MESSAGES: Record = {
- network: "I can't reach the AI provider right now. Please try again in a moment.",
- auth: 'The AI provider rejected our credentials. An admin needs to check the API key.',
- rate_limit: "We've hit a rate limit. Please wait a minute and try again.",
- bad_request: "I couldn't process that — the provider rejected the request shape.",
- content_filter:
- 'Your message was flagged as potentially unsafe by the AI provider. Try rephrasing your request.',
- policy:
- "This request isn't allowed by your account's plan or has exceeded its budget. Please contact your administrator.",
- unknown: 'Something went wrong while processing your message. Please try again.',
-};
-
-function extractCode(err: unknown): string | undefined {
- if (!(err instanceof Error)) return undefined;
- const direct = (err as { code?: unknown }).code;
- if (typeof direct === 'string') return direct;
- const cause = (err as { cause?: unknown }).cause;
- if (cause instanceof Error) {
- const causeCode = (cause as { code?: unknown }).code;
- if (typeof causeCode === 'string') return causeCode;
- }
- return undefined;
-}
-
-function matchesAny(haystack: string, needles: readonly string[]): boolean {
- return needles.some((n) => haystack.includes(n));
-}
-
-/**
- * Classify any thrown value into a category and a user-safe display string.
- *
- * Order matters: auth/rate_limit/bad_request are checked before the broader
- * network match, since some provider messages contain "connection" wording
- * even when the real cause was a 401/429/400.
- */
-export function classifyAgentError(err: unknown): ClassifiedAgentError {
- const message = err instanceof Error ? err.message : '';
- const lower = message.toLowerCase();
- const code = extractCode(err);
-
- if (matchesAny(lower, POLICY_PATTERNS)) {
- return { category: 'policy', text: MESSAGES.policy };
- }
- if (matchesAny(lower, CONTENT_FILTER_PATTERNS)) {
- return { category: 'content_filter', text: MESSAGES.content_filter };
- }
- if (matchesAny(lower, AUTH_PATTERNS)) {
- return { category: 'auth', text: MESSAGES.auth };
- }
- if (matchesAny(lower, RATE_LIMIT_PATTERNS)) {
- return { category: 'rate_limit', text: MESSAGES.rate_limit };
- }
- if (matchesAny(lower, BAD_REQUEST_PATTERNS)) {
- return { category: 'bad_request', text: MESSAGES.bad_request };
- }
- if ((code && NETWORK_CODES.has(code)) || matchesAny(lower, NETWORK_PATTERNS)) {
- return { category: 'network', text: MESSAGES.network };
- }
- return { category: 'unknown', text: MESSAGES.unknown };
-}
diff --git a/packages/api/src/channels/message-router.service.ts b/packages/api/src/channels/message-router.service.ts
index ddd2da6..bc17108 100644
--- a/packages/api/src/channels/message-router.service.ts
+++ b/packages/api/src/channels/message-router.service.ts
@@ -18,15 +18,25 @@ import { CommandService } from '../commands/command.service.js';
import { resolveToolProgressMode, formatToolBubble, type BubbleState } from '@clawix/shared';
-import { classifyAgentError } from './agent-error-message.js';
+import { classifyError } from '../engine/error-classifier.js';
+import type { ErrorCategory } from '../engine/recovery-loop.types.js';
+import { agentErrorTotal } from '../engine/recovery-metrics.js';
-const ERROR_CODE_BY_CATEGORY: Record = {
+const ERROR_CODE_BY_CATEGORY: Record = {
network: 'NETWORK_ERROR',
- auth: 'AUTH_ERROR',
+ timeout: 'TIMEOUT',
+ overloaded: 'OVERLOADED',
+ server_error: 'SERVER_ERROR',
rate_limit: 'RATE_LIMITED',
+ auth: 'AUTH_ERROR',
+ billing: 'BILLING_ERROR',
+ model_not_found: 'MODEL_UNAVAILABLE',
+ provider_policy: 'CONTENT_FILTERED',
+ context_overflow: 'CONTEXT_OVERFLOW',
+ payload_too_large: 'PAYLOAD_TOO_LARGE',
bad_request: 'BAD_REQUEST',
- content_filter: 'CONTENT_FILTERED',
policy: 'POLICY_DENIED',
+ loop_aborted: 'LOOP_ABORTED',
unknown: 'AGENT_ERROR',
};
@@ -119,6 +129,7 @@ export class MessageRouterService {
// 6. Run agent — session creation is delegated to agent-runner so that
// pre-execution validation failures (provider blocked, budget exceeded,
// inactive agent) don't leave orphan empty sessions in the database.
+ let agentProviderName: string | undefined;
try {
// Resolve agent + channel settings for streaming. Reads happen inside
// try/catch so NotFoundError (e.g. dangling agent FK) flows to the
@@ -127,6 +138,7 @@ export class MessageRouterService {
this.agentDefRepo.findById(userAgent.agentDefinitionId),
this.channelRepo.findById(channel.id).catch(() => null),
]);
+ agentProviderName = agentDef.provider;
const toolProgressMode = resolveToolProgressMode(
channel.type,
channelRow?.toolProgressMode ?? null,
@@ -196,7 +208,11 @@ export class MessageRouterService {
cause instanceof Error
? { message: cause.message, code: (cause as { code?: string }).code }
: undefined;
- const classified = classifyAgentError(error);
+ const classified = classifyError(error);
+ agentErrorTotal.inc({
+ category: classified.category,
+ provider: agentProviderName ?? 'unknown',
+ });
logger.error(
{ userId: user.id, err: error, cause: causeInfo, category: classified.category },
'Agent execution failed',
diff --git a/packages/api/src/common/index.ts b/packages/api/src/common/index.ts
index 4f2fe8f..6d3d234 100644
--- a/packages/api/src/common/index.ts
+++ b/packages/api/src/common/index.ts
@@ -9,7 +9,8 @@ export { PolicyThrottlerGuard } from './policy-throttler.guard.js';
export {
resolvePolicyLimit,
resolvePolicyTtl,
- AUTH_THROTTLE_LIMIT,
AUTH_THROTTLE_TTL_MS,
- AUTH_THROTTLE_BLOCK_MS,
+ LOGIN_THROTTLE_LIMIT,
+ LOGIN_THROTTLE_BLOCK_MS,
+ REFRESH_THROTTLE_LIMIT,
} from './throttle.config.js';
diff --git a/packages/api/src/common/security.config.ts b/packages/api/src/common/security.config.ts
index a6fe744..3fc24e7 100644
--- a/packages/api/src/common/security.config.ts
+++ b/packages/api/src/common/security.config.ts
@@ -1,6 +1,7 @@
import type { NestFastifyApplication } from '@nestjs/platform-fastify';
import helmet, { type FastifyHelmetOptions } from '@fastify/helmet';
import cors from '@fastify/cors';
+import cookie from '@fastify/cookie';
import type { FastifyInstance } from 'fastify';
/**
@@ -86,4 +87,5 @@ export async function registerSecurityPlugins(app: NestFastifyApplication): Prom
const fastify = app.getHttpAdapter().getInstance() as unknown as FastifyInstance;
await fastify.register(helmet, buildHelmetOptions());
await fastify.register(cors, buildCorsOptions());
+ await fastify.register(cookie);
}
diff --git a/packages/api/src/common/throttle.config.ts b/packages/api/src/common/throttle.config.ts
index f43c4bb..6902d8f 100644
--- a/packages/api/src/common/throttle.config.ts
+++ b/packages/api/src/common/throttle.config.ts
@@ -14,10 +14,25 @@ const POLICY_LIMITS: Readonly> = {
const DEFAULT_LIMIT = 30;
const DEFAULT_TTL_MS = 60_000;
-/** Auth endpoints: stricter limit to prevent brute-force. */
-export const AUTH_THROTTLE_LIMIT = 5;
+/** Shared window for auth endpoints. */
export const AUTH_THROTTLE_TTL_MS = 60_000;
-export const AUTH_THROTTLE_BLOCK_MS = 300_000; // 5-minute block
+
+/**
+ * Login: strict limit + punitive block to deter password brute-force.
+ * Each attempt consumes credentials, so 5/min with a 5-min lockout fits
+ * the threat model.
+ */
+export const LOGIN_THROTTLE_LIMIT = 5;
+export const LOGIN_THROTTLE_BLOCK_MS = 300_000;
+
+/**
+ * Refresh: relaxed limit, no block. The endpoint already requires a valid
+ * refresh token, so it is not a brute-force surface. The dashboard may fire
+ * several refreshes in quick succession (parallel API calls on page load,
+ * multiple open tabs), and a 5-min lockout after a transient burst makes
+ * the app unusable for legitimate users.
+ */
+export const REFRESH_THROTTLE_LIMIT = 30;
/**
* Resolvable limit function: reads the user's policyName from the JWT payload
diff --git a/packages/api/src/engine/__tests__/agent-runner.service.test.ts b/packages/api/src/engine/__tests__/agent-runner.service.test.ts
index 7f580fc..5b2bdcc 100644
--- a/packages/api/src/engine/__tests__/agent-runner.service.test.ts
+++ b/packages/api/src/engine/__tests__/agent-runner.service.test.ts
@@ -45,10 +45,6 @@ vi.mock('../tools/web/index.js', () => ({
registerWebTools: vi.fn(),
}));
-vi.mock('../resilience.js', () => ({
- ResilientLLMProvider: vi.fn().mockImplementation((inner: unknown) => inner),
-}));
-
vi.mock('../context-builder.service.js', () => ({
ContextBuilderService: vi.fn(),
}));
@@ -426,6 +422,7 @@ describe('AgentRunnerService', () => {
findByTaskRunId: vi.fn().mockResolvedValue([]),
} as unknown as import('../../db/task-run-message.repository.js').TaskRunMessageRepository,
mocks.mockSystemSettings as unknown as import('../../system-settings/system-settings.service.js').SystemSettingsService,
+ { compress: vi.fn() } as unknown as import('../compressor.js').CompressorService,
);
});
@@ -1052,6 +1049,7 @@ describe('AgentRunnerService — with messageStore', () => {
findByTaskRunId: vi.fn().mockResolvedValue([]),
} as unknown as import('../../db/task-run-message.repository.js').TaskRunMessageRepository,
mocks.mockSystemSettings as unknown as import('../../system-settings/system-settings.service.js').SystemSettingsService,
+ { compress: vi.fn() } as unknown as import('../compressor.js').CompressorService,
);
});
@@ -1081,3 +1079,113 @@ describe('AgentRunnerService — with messageStore', () => {
expect(result.sessionId).toBeNull();
});
});
+
+// ------------------------------------------------------------------ //
+// AgentRunnerService — recovery integration //
+// ------------------------------------------------------------------ //
+
+describe('AgentRunnerService — recovery integration', () => {
+ let service: AgentRunnerService;
+ let mocks: ReturnType;
+ let mockLoopInstance: { run: ReturnType };
+
+ beforeEach(() => {
+ vi.clearAllMocks();
+ process.env['OPENAI_API_KEY'] = 'test-key';
+
+ mocks = buildMocks();
+
+ mockLoopInstance = { run: vi.fn().mockResolvedValue(mockLoopResult) };
+ vi.mocked(ReasoningLoop).mockImplementation(() => mockLoopInstance as unknown as ReasoningLoop);
+ vi.mocked(createProvider).mockReturnValue(mockProvider);
+
+ service = new AgentRunnerService(
+ mocks.mockSessionManager as unknown as SessionManagerService,
+ mocks.mockContainerRunner as unknown as ContainerRunner,
+ mocks.mockContainerPool as unknown as ContainerPoolService,
+ mocks.mockTokenCounter as unknown as TokenCounterService,
+ mocks.mockAgentRunRepo as unknown as AgentRunRepository,
+ mocks.mockAgentDefRepo as unknown as AgentDefinitionRepository,
+ mocks.mockUserRepo as unknown as UserRepository,
+ mocks.mockUserAgentRepo as unknown as UserAgentRepository,
+ mocks.mockMemoryConsolidation as unknown as MemoryConsolidationService,
+ mocks.mockContextBuilder as unknown as ContextBuilderService,
+ {} as unknown as SearchProviderRegistry,
+ { get: () => mocks.mockTaskExecutor } as unknown as import('@nestjs/core').ModuleRef,
+ {} as unknown as import('../../prisma/prisma.service.js').PrismaService,
+ {
+ findVisibleToUser: vi.fn().mockResolvedValue([]),
+ } as unknown as import('../../db/memory-item.repository.js').MemoryItemRepository,
+ mocks.mockWorkspaceSeeder as unknown as import('../workspace-seeder.service.js').WorkspaceSeederService,
+ mocks.mockPolicyRepo as unknown as import('../../db/policy.repository.js').PolicyRepository,
+ {} as unknown as import('../../db/channel.repository.js').ChannelRepository,
+ mocks.mockTaskRepo as unknown as import('../../db/task.repository.js').TaskRepository,
+ mocks.mockCronGuardService as unknown as import('../cron-guard.service.js').CronGuardService,
+ mocks.mockProviderConfig as unknown as import('../../provider-config/provider-config.service.js').ProviderConfigService,
+ {
+ findByTaskIdWithLimit: vi.fn().mockResolvedValue([]),
+ } as unknown as import('../../db/task-run.repository.js').TaskRunRepository,
+ {
+ findByTaskRunId: vi.fn().mockResolvedValue([]),
+ } as unknown as import('../../db/task-run-message.repository.js').TaskRunMessageRepository,
+ mocks.mockSystemSettings as unknown as import('../../system-settings/system-settings.service.js').SystemSettingsService,
+ { compress: vi.fn() } as unknown as import('../compressor.js').CompressorService,
+ );
+ });
+
+ afterEach(() => {
+ delete process.env['OPENAI_API_KEY'];
+ });
+
+ const defaultOptions: RunOptions = {
+ agentDefinitionId: 'agent-def-1',
+ input: 'Hello!',
+ userId: 'user-1',
+ };
+
+ // ---------------------------------------------------------------- //
+ // Recovery test 1: transient 503 recovered internally by loop //
+ // ---------------------------------------------------------------- //
+
+ it('recovers from a transient 503 and completes the run', async () => {
+ // Simulate: recovery loop retries internally and returns success.
+ // The reasoning loop mock returns 'recovered' on its run() call, as
+ // runWithRecovery would have retried the 503 before returning.
+ mockLoopInstance.run.mockResolvedValueOnce({
+ content: 'recovered',
+ messages: [
+ { role: 'system' as const, content: 'You are a helpful assistant.' },
+ { role: 'user' as const, content: 'Hello!' },
+ { role: 'assistant' as const, content: 'recovered' },
+ ],
+ totalUsage: { inputTokens: 100, outputTokens: 20, totalTokens: 120 },
+ iterations: 2,
+ hitMaxIterations: false,
+ });
+
+ const result = await service.run(defaultOptions);
+
+ expect(result.status).toBe('completed');
+ expect(result.output).toContain('recovered');
+ expect(mockLoopInstance.run).toHaveBeenCalledTimes(1);
+ });
+
+ // ---------------------------------------------------------------- //
+ // Recovery test 2: LoopAbortedError surfaces as failed run //
+ // ---------------------------------------------------------------- //
+
+ it('surfaces loop_aborted when the same tool fails 3× in a row', async () => {
+ const { LoopAbortedError: LoopAbortedErrorCtor } = await import('../error-classifier.js');
+ const loopAbortedErr = new LoopAbortedErrorCtor('web_search', { q: 'x' });
+ mockLoopInstance.run.mockRejectedValueOnce(loopAbortedErr);
+
+ await expect(service.run(defaultOptions)).rejects.toMatchObject({
+ name: 'LoopAbortedError',
+ });
+
+ expect(mocks.mockAgentRunRepo.update).toHaveBeenCalledWith(
+ 'run-1',
+ expect.objectContaining({ status: 'failed' }),
+ );
+ });
+});
diff --git a/packages/api/src/engine/__tests__/compressor.test.ts b/packages/api/src/engine/__tests__/compressor.test.ts
new file mode 100644
index 0000000..6c69da7
--- /dev/null
+++ b/packages/api/src/engine/__tests__/compressor.test.ts
@@ -0,0 +1,276 @@
+import { describe, expect, it, vi } from 'vitest';
+import type { ChatMessage } from '@clawix/shared';
+import { CompressorService } from '../compressor.js';
+
+interface MockDeps {
+ resolveProvider: ReturnType;
+ systemSettingsGet: ReturnType;
+ chat: ReturnType;
+}
+
+function makeMockDeps(overrides: Partial = {}): MockDeps {
+ return {
+ resolveProvider: vi.fn().mockResolvedValue({ apiKey: 'k', apiBaseUrl: null }),
+ systemSettingsGet: vi.fn().mockResolvedValue({}),
+ chat: vi.fn().mockResolvedValue({ content: 'summary text', toolCalls: [], usage: {} }),
+ ...overrides,
+ };
+}
+
+function userMsg(text: string): ChatMessage {
+ return { role: 'user', content: text } as ChatMessage;
+}
+function asstMsg(text: string): ChatMessage {
+ return { role: 'assistant', content: text } as ChatMessage;
+}
+
+function buildService(deps: MockDeps): CompressorService {
+ return new CompressorService(
+ { resolveProvider: deps.resolveProvider } as never,
+ { get: deps.systemSettingsGet } as never,
+ { create: () => ({ chat: deps.chat, name: 'mock' }) } as never,
+ );
+}
+
+describe('CompressorService', () => {
+ describe('boundary detection', () => {
+ it('preserves the last 2 user-message cycles verbatim', async () => {
+ const deps = makeMockDeps();
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 'sys' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ userMsg('u3'),
+ asstMsg('a3'),
+ userMsg('u4'), // 2nd-to-last user
+ asstMsg('a4'),
+ userMsg('u5'), // last user
+ asstMsg('a5'),
+ ];
+ const result = await svc.compress(messages, { provider: 'anthropic', model: 'm' });
+ // System + summary header + last 2 cycles = 6 messages
+ expect(result.length).toBe(6);
+ expect(result[0]!.role).toBe('system'); // original system prompt preserved
+ expect(result[1]!.role).toBe('system'); // synthetic summary header
+ expect((result[1]! as { content: string }).content).toContain('summary');
+ expect((result[2]! as { content: string }).content).toBe('u4');
+ expect((result[5]! as { content: string }).content).toBe('a5');
+ });
+
+ it('returns messages unchanged when there are fewer than 2 user cycles', async () => {
+ const deps = makeMockDeps();
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 'sys' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ ];
+ const result = await svc.compress(messages, { provider: 'anthropic', model: 'm' });
+ expect(result).toEqual(messages);
+ expect(deps.chat).not.toHaveBeenCalled();
+ });
+ });
+
+ describe('compression model resolution', () => {
+ it('uses systemSettings.compressionModel when set', async () => {
+ const deps = makeMockDeps({
+ systemSettingsGet: vi
+ .fn()
+ .mockResolvedValue({ compressionModel: { provider: 'openai', model: 'gpt-mini' } }),
+ resolveProvider: vi.fn().mockResolvedValue({ apiKey: 'kk', apiBaseUrl: null }),
+ });
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 's' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ userMsg('u3'),
+ asstMsg('a3'),
+ ];
+ await svc.compress(messages, { provider: 'anthropic', model: 'sonnet' });
+ expect(deps.resolveProvider).toHaveBeenCalledWith('openai');
+ });
+
+ it('falls back to fallbackProviderModel when compressionModel is unset', async () => {
+ const deps = makeMockDeps({ systemSettingsGet: vi.fn().mockResolvedValue({}) });
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 's' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ userMsg('u3'),
+ asstMsg('a3'),
+ ];
+ await svc.compress(messages, { provider: 'anthropic', model: 'sonnet' });
+ expect(deps.resolveProvider).toHaveBeenCalledWith('anthropic');
+ });
+
+ it('falls back when compressionModel resolves but provider lookup fails', async () => {
+ const deps = makeMockDeps({
+ systemSettingsGet: vi
+ .fn()
+ .mockResolvedValue({ compressionModel: { provider: 'deleted-provider', model: 'm' } }),
+ });
+ let firstCall = true;
+ deps.resolveProvider.mockImplementation(async (name: string) => {
+ if (firstCall && name === 'deleted-provider') {
+ firstCall = false;
+ throw new Error('No provider config found');
+ }
+ return { apiKey: 'k', apiBaseUrl: null };
+ });
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 's' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ userMsg('u3'),
+ asstMsg('a3'),
+ ];
+ await svc.compress(messages, { provider: 'anthropic', model: 'sonnet' });
+ // First call to deleted-provider failed; fallback to anthropic was called.
+ expect(deps.resolveProvider).toHaveBeenCalledWith('anthropic');
+ });
+ });
+
+ describe('tool-use/tool-result pairing safety', () => {
+ it('handles cross-cut tool_use/tool_result by expanding boundary backward', async () => {
+ // Construct a case where the naive boundary cuts a tool_use from its tool_result:
+ // index 0: system
+ // 1: u1
+ // 2: a1
+ // 3: u2
+ // 4: a2 with toolCalls=[tc-X]
+ // 5: u3 <- naive 2nd-to-last user (boundary candidate)
+ // 6: tool toolCallId=tc-X <- cross-cut: tool_use on older side, result on kept side
+ // 7: a3
+ // 8: u4 <- last user
+ // 9: a4
+ //
+ // Expected: boundary expands back to u2 (index 3), making tc-X's pair entirely
+ // on the older side (both a2[tc-X] and tool(tc-X) go into the summarized block).
+ const deps = makeMockDeps();
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 'sys' } as ChatMessage, // 0
+ userMsg('u1'), // 1
+ asstMsg('a1'), // 2
+ userMsg('u2'), // 3
+ {
+ role: 'assistant',
+ content: 'fetching',
+ toolCalls: [{ id: 'tc-X', name: 'web_fetch', arguments: { url: 'http://x' } }],
+ } as ChatMessage, // 4
+ userMsg('u3'), // 5 <- naive boundary
+ { role: 'tool', content: 'result', toolCallId: 'tc-X' } as ChatMessage, // 6 cross-cut
+ asstMsg('a3'), // 7
+ userMsg('u4'), // 8
+ asstMsg('a4'), // 9
+ ];
+ const result = await svc.compress(messages, { provider: 'anthropic', model: 'm' });
+ // Boundary expands to u2 (index 3). Output: [sys, summary, u2, a2[tc-X], u3, tool(tc-X), a3, u4, a4]
+ // = 1 (system) + 1 (summary) + 7 (kept: indices 3-9) = 9
+ expect(result.length).toBe(9);
+ expect(result[0]!.role).toBe('system');
+ expect(result[1]!.role).toBe('system');
+ expect((result[1]! as { content: string }).content).toContain('summary');
+ expect((result[2]! as { content: string }).content).toBe('u2');
+ expect((result[8]! as { content: string }).content).toBe('a4');
+ });
+
+ it('does not expand boundary when tool_use/tool_result pair is entirely on kept side', async () => {
+ const deps = makeMockDeps();
+ const svc = buildService(deps);
+ // u1, a1, u2, a2, u3, a3[tc-Y], tool(tc-Y), a3-final, u4, a4[tc-Z], tool(tc-Z), a4-final, u5, a5
+ // Naive boundary = u4 (2nd-to-last user). a4[tc-Z] and tool(tc-Z) are both on kept side — safe.
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 'sys' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ userMsg('u3'),
+ {
+ role: 'assistant',
+ content: 'older-tool',
+ toolCalls: [{ id: 'tc-Y', name: 't', arguments: {} }],
+ } as ChatMessage,
+ { role: 'tool', content: 'res-Y', toolCallId: 'tc-Y' } as ChatMessage,
+ asstMsg('a3-final'),
+ userMsg('u4'), // boundary (2nd-to-last)
+ {
+ role: 'assistant',
+ content: 'new-tool',
+ toolCalls: [{ id: 'tc-Z', name: 't', arguments: {} }],
+ } as ChatMessage,
+ { role: 'tool', content: 'res-Z', toolCallId: 'tc-Z' } as ChatMessage,
+ asstMsg('a4-final'),
+ userMsg('u5'), // last user
+ asstMsg('a5'),
+ ];
+ const result = await svc.compress(messages, { provider: 'anthropic', model: 'm' });
+ // Boundary at u4 (index 9). Kept side: u4..a5 = 6 messages.
+ // Output: system + summary + 6 kept = 8
+ expect(result.length).toBe(8);
+ expect((result[2]! as { content: string }).content).toBe('u4');
+ expect((result[7]! as { content: string }).content).toBe('a5');
+ });
+ });
+
+ describe('system message dedup', () => {
+ it('does not duplicate system messages that live at or after the boundary', async () => {
+ const deps = makeMockDeps();
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 'sys-1' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ { role: 'system', content: 'sys-2' } as ChatMessage, // injected mid-conversation
+ userMsg('u3'),
+ asstMsg('a3'),
+ ];
+ const result = await svc.compress(messages, { provider: 'anthropic', model: 'm' });
+ // System messages in output: sys-1, sys-2 (both from original), plus synthetic summary.
+ const systemMessages = result.filter((m) => m.role === 'system');
+ expect(systemMessages.length).toBe(3);
+ const systemContents = systemMessages.map((m) => (m as { content: string }).content);
+ // sys-2 should appear exactly once (not duplicated from afterBoundary)
+ expect(systemContents.filter((c) => c === 'sys-2')).toHaveLength(1);
+ });
+ });
+
+ describe('summarizer call', () => {
+ it('passes microcompacted older messages to the summarizer', async () => {
+ const deps = makeMockDeps();
+ const svc = buildService(deps);
+ const messages: ChatMessage[] = [
+ { role: 'system', content: 's' } as ChatMessage,
+ userMsg('u1'),
+ asstMsg('a1'),
+ userMsg('u2'),
+ asstMsg('a2'),
+ userMsg('u3'),
+ asstMsg('a3'),
+ ];
+ await svc.compress(messages, { provider: 'anthropic', model: 'sonnet' });
+ expect(deps.chat).toHaveBeenCalledTimes(1);
+ const [chatMessages, chatOpts] = deps.chat.mock.calls[0]!;
+ expect(Array.isArray(chatMessages)).toBe(true);
+ // Summarizer receives a system+user prompt pair built from older messages
+ expect(chatMessages.length).toBeGreaterThanOrEqual(2);
+ expect((chatMessages[0] as { role: string }).role).toBe('system');
+ void chatOpts;
+ });
+ });
+});
diff --git a/packages/api/src/engine/__tests__/error-classifier.test.ts b/packages/api/src/engine/__tests__/error-classifier.test.ts
new file mode 100644
index 0000000..677de94
--- /dev/null
+++ b/packages/api/src/engine/__tests__/error-classifier.test.ts
@@ -0,0 +1,162 @@
+import { describe, expect, it } from 'vitest';
+import { classifyError, LoopAbortedError } from '../error-classifier.js';
+
+function err(message: string, extra: Record = {}): Error {
+ const e = new Error(message);
+ Object.assign(e, extra);
+ return e;
+}
+
+describe('classifyError', () => {
+ describe('provider transient', () => {
+ it('classifies ECONNREFUSED as network', () => {
+ const r = classifyError(err('fetch failed', { code: 'ECONNREFUSED' }));
+ expect(r.category).toBe('network');
+ expect(r.flags.retryable).toBe(true);
+ });
+ it('classifies undici body-timeout as network', () => {
+ const r = classifyError(err('UND_ERR_BODY_TIMEOUT'));
+ expect(r.category).toBe('network');
+ });
+ it('classifies "504 deadline exceeded" as timeout', () => {
+ const r = classifyError(err('status 504 deadline exceeded'));
+ expect(r.category).toBe('timeout');
+ expect(r.flags.retryable).toBe(true);
+ });
+ it('classifies "503 overloaded" as overloaded', () => {
+ const r = classifyError(err('status 503 overloaded_error'));
+ expect(r.category).toBe('overloaded');
+ expect(r.flags.retryable).toBe(true);
+ });
+ it('classifies "500 internal server error" as server_error', () => {
+ const r = classifyError(err('500 Internal Server Error'));
+ expect(r.category).toBe('server_error');
+ expect(r.flags.retryable).toBe(true);
+ });
+ it('classifies 429 as rate_limit', () => {
+ const r = classifyError(err('status 429 too many requests'));
+ expect(r.category).toBe('rate_limit');
+ expect(r.flags.retryable).toBe(true);
+ });
+ });
+
+ describe('provider permanent (recovery deferred)', () => {
+ it('classifies 401 as auth with rotatable flag', () => {
+ const r = classifyError(err('status 401 unauthorized'));
+ expect(r.category).toBe('auth');
+ expect(r.flags.retryable).toBe(false);
+ expect(r.flags.rotatable).toBe(true);
+ });
+ it('classifies 402 / "credit balance" as billing', () => {
+ const r = classifyError(err('402 — your credit balance is too low'));
+ expect(r.category).toBe('billing');
+ expect(r.flags.rotatable).toBe(true);
+ });
+ it('classifies "model_deprecated" as model_not_found', () => {
+ const r = classifyError(err('the model has been model_deprecated'));
+ expect(r.category).toBe('model_not_found');
+ expect(r.flags.fallbackable).toBe(true);
+ });
+ it('classifies "violates our content policy" as provider_policy', () => {
+ const r = classifyError(err('this request violates our content policy'));
+ expect(r.category).toBe('provider_policy');
+ expect(r.flags.fallbackable).toBe(true);
+ });
+ });
+
+ describe('provider permanent (no recovery)', () => {
+ it('classifies "context_length_exceeded" as context_overflow', () => {
+ const r = classifyError(err('400 — context_length_exceeded: 250000 > 200000'));
+ expect(r.category).toBe('context_overflow');
+ expect(r.flags.compressible).toBe(true);
+ });
+ it('classifies 413 as payload_too_large', () => {
+ const r = classifyError(err('status 413 request too large'));
+ expect(r.category).toBe('payload_too_large');
+ expect(r.flags.retryable).toBe(false);
+ expect(r.flags.compressible).toBe(false);
+ });
+ it('classifies 400 (catch-all) as bad_request', () => {
+ const r = classifyError(err('status 400 invalid argument'));
+ expect(r.category).toBe('bad_request');
+ });
+ });
+
+ describe('non-provider', () => {
+ it('classifies "is not allowed by policy" as policy', () => {
+ const r = classifyError(err('action is not allowed by policy'));
+ expect(r.category).toBe('policy');
+ });
+ it('classifies LoopAbortedError as loop_aborted', () => {
+ const r = classifyError(new LoopAbortedError('web_search', { q: 'x' }));
+ expect(r.category).toBe('loop_aborted');
+ });
+ it('classifies a random Error as unknown', () => {
+ const r = classifyError(err('unexpected internal failure'));
+ expect(r.category).toBe('unknown');
+ });
+ });
+
+ describe('ordering', () => {
+ it('keeps auth ahead of network even when message contains "connection"', () => {
+ const r = classifyError(err('status 401 — connection rejected by gateway'));
+ expect(r.category).toBe('auth');
+ });
+ it('keeps context_overflow ahead of bad_request for 400 responses', () => {
+ const r = classifyError(err('400 — context_length_exceeded'));
+ expect(r.category).toBe('context_overflow');
+ });
+ it('keeps provider_policy ahead of bad_request', () => {
+ const r = classifyError(err('400 — content_filter triggered'));
+ expect(r.category).toBe('provider_policy');
+ });
+ });
+
+ describe('error-shape extraction', () => {
+ it('reads .cause one level deep', () => {
+ const inner = err('ECONNRESET', { code: 'ECONNRESET' });
+ const outer = err('fetch failed', { cause: inner });
+ const r = classifyError(outer);
+ expect(r.category).toBe('network');
+ });
+ it('falls back gracefully on non-Error throws', () => {
+ const r = classifyError('plain string error');
+ expect(r.category).toBe('unknown');
+ });
+ it('attaches the original error as .cause', () => {
+ const e = err('boom');
+ const r = classifyError(e);
+ expect(r.cause).toBe(e);
+ });
+ });
+
+ describe('user-safe text', () => {
+ it('never includes the raw message verbatim', () => {
+ const sensitive = err('status 401 — Bearer abc123secret');
+ const r = classifyError(sensitive);
+ expect(r.text).not.toContain('abc123secret');
+ });
+ it('every category produces non-empty text', () => {
+ const samples: Record = {
+ network: err('ECONNRESET', { code: 'ECONNRESET' }),
+ timeout: err('504 deadline'),
+ overloaded: err('503 overloaded'),
+ server_error: err('500 internal'),
+ rate_limit: err('429'),
+ auth: err('401'),
+ billing: err('402 credit balance'),
+ model_not_found: err('model_deprecated'),
+ provider_policy: err('content policy violation'),
+ context_overflow: err('context_length_exceeded'),
+ payload_too_large: err('413 request too large'),
+ bad_request: err('400 invalid'),
+ policy: err('not allowed by policy'),
+ unknown: err('boom'),
+ };
+ for (const e of Object.values(samples)) {
+ const r = classifyError(e);
+ expect(r.text.length).toBeGreaterThan(0);
+ }
+ });
+ });
+});
diff --git a/packages/api/src/engine/__tests__/reasoning-loop-timeout.test.ts b/packages/api/src/engine/__tests__/reasoning-loop-timeout.test.ts
index 35cbc89..2e1d8b4 100644
--- a/packages/api/src/engine/__tests__/reasoning-loop-timeout.test.ts
+++ b/packages/api/src/engine/__tests__/reasoning-loop-timeout.test.ts
@@ -5,6 +5,9 @@ import { ReasoningLoop } from '../reasoning-loop.js';
import { ToolRegistry } from '../tool-registry.js';
import type { Tool, ToolResult } from '../tool.js';
+const mockCompressor = { compress: vi.fn() } as never;
+const providerInfo = { provider: 'mock', model: 'test-model' };
+
function createSlowProvider(delayMs: number): LLMProvider {
return {
name: 'test',
@@ -54,7 +57,7 @@ describe('ReasoningLoop timeout', () => {
// start of iteration 2 sees the signal and breaks.
const provider = createSlowProvider(200);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const messages: ChatMessage[] = [{ role: 'user', content: 'hello' }];
const result = await loop.run(messages, {
@@ -67,7 +70,7 @@ describe('ReasoningLoop timeout', () => {
it('completes normally when within timeout', async () => {
const provider = createSlowProvider(10);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const messages: ChatMessage[] = [{ role: 'user', content: 'hello' }];
const result = await loop.run(messages, { timeoutMs: 5000 });
@@ -79,7 +82,7 @@ describe('ReasoningLoop timeout', () => {
it('returns hitTimeout false when no timeout configured', async () => {
const provider = createSlowProvider(10);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const messages: ChatMessage[] = [{ role: 'user', content: 'hello' }];
const result = await loop.run(messages);
@@ -90,7 +93,7 @@ describe('ReasoningLoop timeout', () => {
it('aborts immediately when external signal is already aborted', async () => {
const provider = createSlowProvider(10);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const messages: ChatMessage[] = [{ role: 'user', content: 'hello' }];
const controller = new AbortController();
@@ -112,7 +115,7 @@ describe('ReasoningLoop timeout', () => {
const tool = makeMockTool('slow_tool', 'result');
const registry = new ToolRegistry();
registry.register(tool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const messages: ChatMessage[] = [{ role: 'user', content: 'hello' }];
const result = await loop.run(messages, { timeoutMs: 50 });
@@ -127,7 +130,7 @@ describe('ReasoningLoop timeout', () => {
it('aborts when external signal fires during execution', async () => {
const provider = createSlowProvider(200);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const messages: ChatMessage[] = [{ role: 'user', content: 'hello' }];
const controller = new AbortController();
@@ -157,7 +160,7 @@ describe('ReasoningLoop timeout', () => {
}),
};
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
await loop.run([{ role: 'user', content: 'hi' }], { timeoutMs: 5000 });
@@ -182,7 +185,7 @@ describe('ReasoningLoop timeout', () => {
}),
};
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'hi' }], { timeoutMs: 30 });
diff --git a/packages/api/src/engine/__tests__/reasoning-loop.test.ts b/packages/api/src/engine/__tests__/reasoning-loop.test.ts
index 8b2acdc..0fff481 100644
--- a/packages/api/src/engine/__tests__/reasoning-loop.test.ts
+++ b/packages/api/src/engine/__tests__/reasoning-loop.test.ts
@@ -56,6 +56,9 @@ function makeUsage(input: number, output: number): LLMUsage {
/* Tests */
/* ------------------------------------------------------------------ */
+const mockCompressor = { compress: vi.fn() } as never;
+const providerInfo = { provider: 'mock', model: 'test-model' };
+
describe('ReasoningLoop', () => {
it('single-turn (no tool calls): returns model response, 1 iteration', async () => {
const response = createLLMResponse({
@@ -65,7 +68,7 @@ describe('ReasoningLoop', () => {
});
const provider = makeMockProvider([response]);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'Hi' }]);
@@ -94,7 +97,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'Find info' }]);
expect(result.content).toBe('Found the answer.');
@@ -127,7 +130,7 @@ describe('ReasoningLoop', () => {
registry.register(searchTool);
registry.register(readTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'Do stuff' }]);
expect(result.content).toBe('Done.');
@@ -156,7 +159,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'Loop forever' }], {
maxIterations: maxIter,
});
@@ -174,7 +177,7 @@ describe('ReasoningLoop', () => {
});
const provider = makeMockProvider([errorResponse]);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'test' }]);
@@ -191,7 +194,7 @@ describe('ReasoningLoop', () => {
}),
};
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
await expect(loop.run([{ role: 'user', content: 'test' }])).rejects.toThrow('API failure');
});
@@ -215,7 +218,7 @@ describe('ReasoningLoop', () => {
registry.register(searchTool);
const onProgress = vi.fn();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
await loop.run([{ role: 'user', content: 'test' }], { onProgress });
expect(onProgress).toHaveBeenCalledOnce();
@@ -232,7 +235,7 @@ describe('ReasoningLoop', () => {
});
const provider = makeMockProvider([response]);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'Hi' }]);
@@ -248,7 +251,7 @@ describe('ReasoningLoop', () => {
});
const provider = makeMockProvider([response]);
const registry = new ToolRegistry();
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const tracker = new BudgetTracker(100, 10);
const result = await loop.run([{ role: 'user', content: 'Hi' }], {
@@ -281,7 +284,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'test' }], {
budgetTracker: new BudgetTracker(100, 10),
});
@@ -312,7 +315,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
await loop.run([{ role: 'user', content: 'test' }], {
budgetTracker: new BudgetTracker(100, 10),
});
@@ -352,7 +355,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'test' }], {
budgetTracker: new BudgetTracker(100, 10),
});
@@ -380,7 +383,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'test' }], {
budgetTracker: new BudgetTracker(100, 10),
});
@@ -401,7 +404,7 @@ describe('ReasoningLoop', () => {
usage: makeUsage(30, 30),
});
const provider1 = makeMockProvider([firstResp]);
- const loop1 = new ReasoningLoop(provider1, new ToolRegistry());
+ const loop1 = new ReasoningLoop(provider1, new ToolRegistry(), mockCompressor, providerInfo);
await loop1.run([{ role: 'user', content: 'p' }], { budgetTracker: tracker });
expect(tracker.used).toBe(60);
@@ -414,7 +417,7 @@ describe('ReasoningLoop', () => {
usage: makeUsage(40, 30), // 60 + 70 = 130 ≥ 110
});
const provider2 = makeMockProvider([subResp]);
- const loop2 = new ReasoningLoop(provider2, new ToolRegistry());
+ const loop2 = new ReasoningLoop(provider2, new ToolRegistry(), mockCompressor, providerInfo);
const result2 = await loop2.run([{ role: 'user', content: 's' }], {
budgetTracker: tracker,
});
@@ -431,7 +434,7 @@ describe('ReasoningLoop', () => {
usage: makeUsage(10_000_000, 10_000_000), // way past any sane limit
});
const provider = makeMockProvider([response]);
- const loop = new ReasoningLoop(provider, new ToolRegistry());
+ const loop = new ReasoningLoop(provider, new ToolRegistry(), mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'go' }], {
budgetTracker: tracker,
@@ -473,7 +476,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const result = await loop.run([{ role: 'user', content: 'test cache' }]);
expect(result.totalUsage.inputTokens).toBe(15);
@@ -501,7 +504,7 @@ describe('ReasoningLoop', () => {
const registry = new ToolRegistry();
registry.register(searchTool);
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const initialMessages: readonly ChatMessage[] = [{ role: 'user', content: 'question' }];
const result = await loop.run(initialMessages);
@@ -532,7 +535,7 @@ describe('ReasoningLoop', () => {
const provider = makeMockProvider(responses);
const registry = new ToolRegistry();
registry.register(makeMockTool('mock_search', '{"results": []}'));
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const events: ReasoningEvent[] = [];
await loop.run([{ role: 'user', content: 'hi' }], {
@@ -565,7 +568,7 @@ describe('ReasoningLoop', () => {
const provider = makeMockProvider(responses);
const registry = new ToolRegistry();
registry.register(makeMockTool('mock_search', 'ok'));
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const events: ReasoningEvent[] = [];
await loop.run([{ role: 'user', content: 'hi' }], { onEvent: (e) => events.push(e) });
@@ -586,7 +589,7 @@ describe('ReasoningLoop', () => {
const provider = makeMockProvider(responses);
const registry = new ToolRegistry();
registry.register(makeMockTool('mock_search', 'ok'));
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
const order: string[] = [];
await loop.run([{ role: 'user', content: 'hi' }], {
diff --git a/packages/api/src/engine/__tests__/recovery-integration.test.ts b/packages/api/src/engine/__tests__/recovery-integration.test.ts
new file mode 100644
index 0000000..8911184
--- /dev/null
+++ b/packages/api/src/engine/__tests__/recovery-integration.test.ts
@@ -0,0 +1,156 @@
+/**
+ * Recovery integration tests — exercises the real ReasoningLoop with a real
+ * recovery path (runWithRecovery + ToolLoopGuard) by mocking only
+ * provider.chat and toolRegistry.execute.
+ *
+ * These tests intentionally do NOT mock ReasoningLoop itself; they verify
+ * that the retry and loop-abort behaviours work end-to-end through the real
+ * loop implementation.
+ */
+
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import type { LLMProvider, LLMResponse, ChatMessage } from '@clawix/shared';
+import { createLLMResponse } from '@clawix/shared';
+
+import { ReasoningLoop } from '../reasoning-loop.js';
+import { ToolRegistry } from '../tool-registry.js';
+import { LoopAbortedError } from '../error-classifier.js';
+import type { Tool, ToolResult } from '../tool.js';
+
+/* ------------------------------------------------------------------ */
+/* Shared fixtures */
+/* ------------------------------------------------------------------ */
+
+const providerInfo = { provider: 'mock', model: 'test-model' };
+
+/** A CompressorService stub — compression is not exercised in these tests. */
+const mockCompressor = { compress: vi.fn() } as never;
+
+const userMessage: ChatMessage = { role: 'user', content: 'Hello!' };
+
+/** A minimal successful LLMResponse with no tool calls. */
+function makeOkResponse(content = 'recovered'): LLMResponse {
+ return createLLMResponse({
+ content,
+ finishReason: 'stop',
+ usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
+ });
+}
+
+/** An LLMResponse that requests a single tool call. */
+function makeToolCallResponse(
+ toolName: string,
+ args: Record,
+ id = 'tc-1',
+): LLMResponse {
+ return createLLMResponse({
+ content: null,
+ finishReason: 'tool_use',
+ toolCalls: [{ id, name: toolName, arguments: args }],
+ usage: { inputTokens: 10, outputTokens: 8, totalTokens: 18 },
+ });
+}
+
+/* ------------------------------------------------------------------ */
+/* Helper: build a minimal LLMProvider from a scripted call sequence */
+/* ------------------------------------------------------------------ */
+
+function makeProvider(
+ script: { error?: Error; response?: LLMResponse }[],
+): LLMProvider & { chat: ReturnType } {
+ let i = 0;
+ const chat = vi.fn().mockImplementation(async () => {
+ const step = script[i++];
+ if (!step) throw new Error('provider script exhausted');
+ if (step.error) throw step.error;
+ return step.response!;
+ });
+ return { name: 'mock', chat } as unknown as LLMProvider & { chat: ReturnType };
+}
+
+/* ------------------------------------------------------------------ */
+/* Tests */
+/* ------------------------------------------------------------------ */
+
+describe('ReasoningLoop — recovery integration', () => {
+ afterEach(() => {
+ vi.useRealTimers();
+ });
+
+ // ---------------------------------------------------------------- //
+ // Test 1: Recovers from a transient 503 and completes the run //
+ // ---------------------------------------------------------------- //
+
+ it('recovers from a transient 503 and completes the run', async () => {
+ const overloaded = Object.assign(new Error('status 503 overloaded'), { status: 503 });
+
+ const provider = makeProvider([
+ { error: overloaded }, // first call: 503 — triggers retry in runWithRecovery
+ { response: makeOkResponse('recovered') }, // second call: success
+ ]);
+
+ const registry = new ToolRegistry();
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
+
+ // Use fake timers so the 500 ms + jitter backoff resolves instantly.
+ vi.useFakeTimers();
+
+ const promise = loop.run([userMessage]);
+
+ // Advance past the first retry backoff window (default: 500 ms base + up to 250 ms jitter).
+ await vi.advanceTimersByTimeAsync(1000);
+
+ const result = await promise;
+
+ // The provider must have been called twice: once failing, once succeeding.
+ expect(provider.chat).toHaveBeenCalledTimes(2);
+ expect(result.content).toBe('recovered');
+ expect(result.iterations).toBe(1); // one completed loop iteration
+ });
+
+ // ---------------------------------------------------------------- //
+ // Test 2: ToolLoopGuard aborts after 3× identical failing calls //
+ // ---------------------------------------------------------------- //
+
+ it('surfaces LoopAbortedError when the same tool fails 3× in a row', async () => {
+ const toolName = 'web_search';
+ const toolArgs = { q: 'test query' };
+
+ // The provider always requests the same tool call (drives the loop).
+ // We need 3 tool-call responses so the guard can see 3 consecutive failures.
+ const provider = makeProvider([
+ { response: makeToolCallResponse(toolName, toolArgs, 'tc-1') },
+ { response: makeToolCallResponse(toolName, toolArgs, 'tc-2') },
+ { response: makeToolCallResponse(toolName, toolArgs, 'tc-3') },
+ ]);
+
+ // Register a tool that always returns an error result.
+ const failingTool: Tool = {
+ name: toolName,
+ description: 'web search',
+ parameters: {
+ type: 'object',
+ properties: { q: { type: 'string' } },
+ required: ['q'],
+ },
+ execute: vi.fn().mockResolvedValue({
+ output: 'ERROR: service unavailable',
+ isError: true,
+ } satisfies ToolResult),
+ };
+
+ const registry = new ToolRegistry();
+ registry.register(failingTool);
+
+ const loop = new ReasoningLoop(provider, registry, mockCompressor, providerInfo);
+
+ // The ToolLoopGuard fires synchronously inside the loop — no timers needed.
+ await expect(loop.run([userMessage])).rejects.toBeInstanceOf(LoopAbortedError);
+
+ // The tool must have been executed exactly 3 times (the threshold).
+ expect(failingTool.execute).toHaveBeenCalledTimes(3);
+
+ // The provider was called 3 times (once per iteration before guard fired).
+ expect(provider.chat).toHaveBeenCalledTimes(3);
+ });
+});
diff --git a/packages/api/src/engine/__tests__/recovery-loop.test.ts b/packages/api/src/engine/__tests__/recovery-loop.test.ts
new file mode 100644
index 0000000..9e728dc
--- /dev/null
+++ b/packages/api/src/engine/__tests__/recovery-loop.test.ts
@@ -0,0 +1,222 @@
+import { describe, expect, it, vi } from 'vitest';
+import type { ChatMessage, LLMProvider, LLMResponse, ChatOptions } from '@clawix/shared';
+
+import { runWithRecovery } from '../recovery-loop.js';
+import { classifyError } from '../error-classifier.js';
+import { DEFAULT_RECOVERY_CONFIG } from '../recovery-loop.types.js';
+import type { RecoveryEvent, RecoveryConfig } from '../recovery-loop.types.js';
+
+const ZERO_BACKOFF: RecoveryConfig = {
+ ...DEFAULT_RECOVERY_CONFIG,
+ retryBackoffMs: [0, 0, 0],
+};
+
+interface ScriptedProvider {
+ name: string;
+ chat: ReturnType;
+}
+
+function provider(script: { throw?: Error; return?: LLMResponse }[]): ScriptedProvider {
+ let i = 0;
+ return {
+ name: 'mock',
+ chat: vi.fn().mockImplementation(async () => {
+ const step = script[i++];
+ if (!step) throw new Error('script exhausted');
+ if (step.throw) throw step.throw;
+ return step.return!;
+ }),
+ };
+}
+
+const ok: LLMResponse = { content: 'hi', toolCalls: [], usage: {} } as unknown as LLMResponse;
+
+const baseMessages: ChatMessage[] = [{ role: 'user', content: 'hello' } as ChatMessage];
+
+describe('runWithRecovery', () => {
+ it('returns on success without invoking recovery', async () => {
+ const events: RecoveryEvent[] = [];
+ const p = provider([{ return: ok }]);
+ const result = await runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ {} as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor: vi.fn(),
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ ZERO_BACKOFF,
+ );
+ expect(result.response).toBe(ok);
+ expect(events).toHaveLength(0);
+ });
+
+ it('retries a transient error and succeeds', async () => {
+ const events: RecoveryEvent[] = [];
+ const p = provider([
+ { throw: Object.assign(new Error('status 503 overloaded'), { status: 503 }) },
+ { return: ok },
+ ]);
+ const result = await runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ {} as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor: vi.fn(),
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ ZERO_BACKOFF,
+ );
+ expect(result.response).toBe(ok);
+ expect(p.chat).toHaveBeenCalledTimes(2);
+ expect(events.map((e) => e.type)).toEqual(['recovery_action', 'recovery_succeeded']);
+ expect(events[0]!.action).toBe('retry');
+ });
+
+ it('exhausts retries and surfaces the last error', async () => {
+ const err = Object.assign(new Error('status 503 overloaded'), { status: 503 });
+ const p = provider([{ throw: err }, { throw: err }, { throw: err }, { throw: err }]);
+ const events: RecoveryEvent[] = [];
+ await expect(
+ runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ {} as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor: vi.fn(),
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ ZERO_BACKOFF,
+ ),
+ ).rejects.toBe(err);
+ expect(p.chat).toHaveBeenCalledTimes(4); // 1 initial + 3 retries
+ expect(events.filter((e) => e.type === 'recovery_action')).toHaveLength(3);
+ expect(events.find((e) => e.type === 'recovery_exhausted')!.reason).toBe('no_action');
+ });
+
+ it('compresses and retries on context_overflow', async () => {
+ const overflow = new Error('400 — context_length_exceeded');
+ const p = provider([{ throw: overflow }, { return: ok }]);
+ const compressed: ChatMessage[] = [{ role: 'system', content: 'compressed' } as ChatMessage];
+ const compressor = vi.fn().mockResolvedValue(compressed);
+ const events: RecoveryEvent[] = [];
+ const result = await runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ {} as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor,
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ ZERO_BACKOFF,
+ );
+ expect(result.response).toBe(ok);
+ expect(result.messages).toBe(compressed);
+ expect(compressor).toHaveBeenCalledTimes(1);
+ expect(events.map((e) => e.action)).toContain('compress');
+ });
+
+ it('surfaces the original error when compressor itself throws', async () => {
+ const overflow = new Error('400 — context_length_exceeded');
+ const compressor = vi.fn().mockRejectedValue(new Error('summarizer 500'));
+ const p = provider([{ throw: overflow }]);
+ const events: RecoveryEvent[] = [];
+ await expect(
+ runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ {} as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor,
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ ZERO_BACKOFF,
+ ),
+ ).rejects.toBe(overflow);
+ expect(events.find((e) => e.type === 'recovery_exhausted')!.reason).toBe('compress_failed');
+ });
+
+ it('honors abortSignal and skips recovery', async () => {
+ const ac = new AbortController();
+ const p: ScriptedProvider = {
+ name: 'mock',
+ chat: vi.fn().mockImplementation(async () => {
+ ac.abort();
+ throw new Error('status 503 overloaded');
+ }),
+ };
+ await expect(
+ runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ { abortSignal: ac.signal } as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor: vi.fn(),
+ },
+ ZERO_BACKOFF,
+ ),
+ ).rejects.toThrow('status 503');
+ expect(p.chat).toHaveBeenCalledTimes(1); // no retry attempted
+ });
+
+ it('aborts during retry backoff and surfaces the original error', async () => {
+ const ac = new AbortController();
+ const p = provider([
+ { throw: Object.assign(new Error('status 503'), { status: 503 }) },
+ // Second call would succeed but should never be reached.
+ { return: ok },
+ ]);
+ // Use a config with a real (non-zero) backoff so the abort can fire mid-sleep.
+ const slowBackoff: RecoveryConfig = {
+ ...DEFAULT_RECOVERY_CONFIG,
+ retryBackoffMs: [200, 200, 200],
+ };
+ const events: RecoveryEvent[] = [];
+ // Schedule abort 50ms in — well before backoff completes.
+ setTimeout(() => ac.abort(), 50);
+ await expect(
+ runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ { abortSignal: ac.signal } as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor: vi.fn(),
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ slowBackoff,
+ ),
+ ).rejects.toThrow('status 503');
+ expect(p.chat).toHaveBeenCalledTimes(1); // only the first call ran
+ // Recovery action was emitted (we did start the retry path), but no second chat call.
+ expect(events.filter((e) => e.type === 'recovery_action')).toHaveLength(1);
+ });
+
+ it('respects globalCap as a safety net', async () => {
+ const tight: RecoveryConfig = { ...ZERO_BACKOFF, globalCap: 2, maxRetries: 5 };
+ const err = Object.assign(new Error('status 503'), { status: 503 });
+ const p = provider([{ throw: err }, { throw: err }, { throw: err }]);
+ const events: RecoveryEvent[] = [];
+ await expect(
+ runWithRecovery(
+ p as unknown as LLMProvider,
+ baseMessages,
+ {} as ChatOptions,
+ {
+ classifier: classifyError,
+ compressor: vi.fn(),
+ onRecoveryEvent: (e) => events.push(e),
+ },
+ tight,
+ ),
+ ).rejects.toBe(err);
+ const exhausted = events.find((e) => e.type === 'recovery_exhausted')!;
+ expect(exhausted.reason).toBe('global_cap');
+ });
+});
diff --git a/packages/api/src/engine/__tests__/recovery-metrics.test.ts b/packages/api/src/engine/__tests__/recovery-metrics.test.ts
new file mode 100644
index 0000000..ffacdce
--- /dev/null
+++ b/packages/api/src/engine/__tests__/recovery-metrics.test.ts
@@ -0,0 +1,73 @@
+import { beforeEach, describe, expect, it } from 'vitest';
+import {
+ agentErrorTotal,
+ recoveryActionTotal,
+ recoveryOutcomeTotal,
+ toolLoopAbortedTotal,
+ wireRecoveryMetrics,
+} from '../recovery-metrics.js';
+import type { RecoveryEvent } from '../recovery-loop.types.js';
+
+function getCount(metric: { hashMap: Record }): number {
+ return Object.values(metric.hashMap).reduce((sum, entry) => sum + entry.value, 0);
+}
+
+describe('recovery-metrics', () => {
+ beforeEach(() => {
+ agentErrorTotal.reset();
+ recoveryActionTotal.reset();
+ recoveryOutcomeTotal.reset();
+ toolLoopAbortedTotal.reset();
+ });
+
+ it('increments recovery_action_total on a recovery_action event', () => {
+ const event: RecoveryEvent = {
+ type: 'recovery_action',
+ action: 'retry',
+ category: 'overloaded',
+ attempt: 1,
+ delayMs: 500,
+ provider: 'anthropic',
+ };
+ wireRecoveryMetrics(event);
+ const m = recoveryActionTotal as unknown as { hashMap: Record };
+ expect(getCount(m)).toBe(1);
+ });
+
+ it('increments recovery_outcome_total on a recovery_succeeded event', () => {
+ wireRecoveryMetrics({
+ type: 'recovery_succeeded',
+ category: 'rate_limit',
+ attempt: 2,
+ action: 'retry',
+ provider: 'openai',
+ });
+ const m = recoveryOutcomeTotal as unknown as { hashMap: Record };
+ expect(getCount(m)).toBe(1);
+ });
+
+ it('increments recovery_outcome_total with the reason on recovery_exhausted', () => {
+ wireRecoveryMetrics({
+ type: 'recovery_exhausted',
+ category: 'context_overflow',
+ attempt: 1,
+ reason: 'compress_failed',
+ provider: 'anthropic',
+ });
+ const m = recoveryOutcomeTotal as unknown as { hashMap: Record };
+ const entries = Object.values(m.hashMap);
+ expect(entries).toHaveLength(1);
+ expect(entries[0]!.value).toBe(1);
+ });
+
+ it('does not throw on events with missing optional fields', () => {
+ expect(() =>
+ wireRecoveryMetrics({
+ type: 'recovery_action',
+ action: 'compress',
+ category: 'context_overflow',
+ attempt: 1,
+ }),
+ ).not.toThrow();
+ });
+});
diff --git a/packages/api/src/engine/__tests__/resilience.test.ts b/packages/api/src/engine/__tests__/resilience.test.ts
deleted file mode 100644
index 5e452a2..0000000
--- a/packages/api/src/engine/__tests__/resilience.test.ts
+++ /dev/null
@@ -1,186 +0,0 @@
-import { describe, expect, it, vi } from 'vitest';
-import type { ChatMessage, ChatOptions, LLMProvider, LLMResponse } from '@clawix/shared';
-import { ResilientLLMProvider, DEFAULT_RETRY_CONFIG, isTransientError } from '../resilience.js';
-
-/* ------------------------------------------------------------------ */
-/* Helpers */
-/* ------------------------------------------------------------------ */
-
-function makeMockResponse(overrides: Partial = {}): LLMResponse {
- return {
- content: 'Hello',
- toolCalls: [],
- finishReason: 'stop',
- usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
- thinkingBlocks: null,
- ...overrides,
- };
-}
-
-/**
- * Creates an LLMProvider mock that fails `failCount` times with the given
- * error message, then succeeds with the given response.
- */
-function makeFlakeyProvider(
- failCount: number,
- errorMessage: string,
- successResponse: LLMResponse = makeMockResponse(),
-): LLMProvider {
- let callCount = 0;
- return {
- name: 'mock-flakey',
- chat: vi.fn(async () => {
- callCount += 1;
- if (callCount <= failCount) {
- throw new Error(errorMessage);
- }
- return successResponse;
- }),
- };
-}
-
-/** Zero-delay retry config for fast tests. */
-const FAST_RETRY_CONFIG = {
- maxRetries: 3,
- backoffMs: [0, 0, 0] as number[],
-};
-
-/* ------------------------------------------------------------------ */
-/* ResilientLLMProvider */
-/* ------------------------------------------------------------------ */
-
-describe('ResilientLLMProvider', () => {
- const messages: readonly ChatMessage[] = [{ role: 'user', content: 'Hello' }];
- const options: ChatOptions = { model: 'claude-3-5-haiku-20241022' };
-
- it('delegates name to inner provider', () => {
- const inner: LLMProvider = { name: 'my-provider', chat: vi.fn() };
- const resilient = new ResilientLLMProvider(inner);
- expect(resilient.name).toBe('my-provider');
- });
-
- it('returns response on success without retry', async () => {
- const response = makeMockResponse({ content: 'Success!' });
- const inner: LLMProvider = { name: 'mock', chat: vi.fn().mockResolvedValue(response) };
- const resilient = new ResilientLLMProvider(inner, FAST_RETRY_CONFIG);
-
- const result = await resilient.chat(messages, options);
-
- expect(result).toBe(response);
- expect(inner.chat).toHaveBeenCalledTimes(1);
- });
-
- it('retries on transient error and succeeds', async () => {
- const response = makeMockResponse({ content: 'Recovered' });
- const inner = makeFlakeyProvider(2, 'status 429 rate limited', response);
- const resilient = new ResilientLLMProvider(inner, FAST_RETRY_CONFIG);
-
- const result = await resilient.chat(messages, options);
-
- expect(result).toBe(response);
- // 2 failures + 1 success = 3 calls total
- expect(inner.chat).toHaveBeenCalledTimes(3);
- });
-
- it('throws after exhausting all retries', async () => {
- const inner = makeFlakeyProvider(10, 'status 503 service unavailable');
- const resilient = new ResilientLLMProvider(inner, FAST_RETRY_CONFIG);
-
- await expect(resilient.chat(messages, options)).rejects.toThrow('status 503');
- // 1 initial attempt + 3 retries = 4 calls
- expect(inner.chat).toHaveBeenCalledTimes(4);
- });
-
- it('does NOT retry on non-transient error', async () => {
- const inner: LLMProvider = {
- name: 'mock',
- chat: vi.fn().mockRejectedValue(new Error('Invalid API key')),
- };
- const resilient = new ResilientLLMProvider(inner, FAST_RETRY_CONFIG);
-
- await expect(resilient.chat(messages, options)).rejects.toThrow('Invalid API key');
- // Should not retry — only 1 call
- expect(inner.chat).toHaveBeenCalledTimes(1);
- });
-
- it('passes messages and options through to inner provider', async () => {
- const response = makeMockResponse();
- const inner: LLMProvider = { name: 'mock', chat: vi.fn().mockResolvedValue(response) };
- const resilient = new ResilientLLMProvider(inner, FAST_RETRY_CONFIG);
-
- await resilient.chat(messages, options);
-
- expect(inner.chat).toHaveBeenCalledWith(messages, options);
- });
-
- it('does not retry when the caller has already aborted the request', async () => {
- // A network-style error message that would normally be retried
- // ("connection") must NOT trigger retries when the abort signal is
- // already set — the failure is intentional cancellation.
- const inner: LLMProvider = {
- name: 'mock',
- chat: vi.fn().mockRejectedValue(new Error('connection reset')),
- };
- const resilient = new ResilientLLMProvider(inner, FAST_RETRY_CONFIG);
-
- const controller = new AbortController();
- controller.abort();
-
- await expect(
- resilient.chat(messages, { ...options, abortSignal: controller.signal }),
- ).rejects.toThrow('connection reset');
- expect(inner.chat).toHaveBeenCalledTimes(1);
- });
-});
-
-/* ------------------------------------------------------------------ */
-/* isTransientError */
-/* ------------------------------------------------------------------ */
-
-describe('isTransientError', () => {
- it.each(['status 429 rate limited', 'rate limit exceeded', 'rate_limit_exceeded'])(
- 'matches rate-limit pattern: %s',
- (msg) => {
- expect(isTransientError(msg)).toBe(true);
- },
- );
-
- it.each(['status 500', 'status 502', 'status 503', 'status 504'])(
- 'matches server error status: %s',
- (msg) => {
- expect(isTransientError(msg)).toBe(true);
- },
- );
-
- it.each(['ETIMEDOUT', 'ECONNRESET', 'request timeout'])(
- 'matches network/timeout pattern: %s',
- (msg) => {
- expect(isTransientError(msg)).toBe(true);
- },
- );
-
- it.each(['Invalid API key', '401 unauthorized', '403 forbidden'])(
- 'does NOT match non-transient pattern: %s',
- (msg) => {
- expect(isTransientError(msg)).toBe(false);
- },
- );
-});
-
-/* ------------------------------------------------------------------ */
-/* DEFAULT_RETRY_CONFIG */
-/* ------------------------------------------------------------------ */
-
-describe('DEFAULT_RETRY_CONFIG', () => {
- it('has maxRetries >= 2', () => {
- expect(DEFAULT_RETRY_CONFIG.maxRetries).toBeGreaterThanOrEqual(2);
- });
-
- it('has backoffMs array with length == maxRetries', () => {
- expect(DEFAULT_RETRY_CONFIG.backoffMs).toHaveLength(DEFAULT_RETRY_CONFIG.maxRetries);
- });
-
- it('has at least one transient pattern', () => {
- expect(DEFAULT_RETRY_CONFIG.transientPatterns.length).toBeGreaterThan(0);
- });
-});
diff --git a/packages/api/src/engine/__tests__/tool-loop-guard.test.ts b/packages/api/src/engine/__tests__/tool-loop-guard.test.ts
new file mode 100644
index 0000000..aa83b04
--- /dev/null
+++ b/packages/api/src/engine/__tests__/tool-loop-guard.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from 'vitest';
+import { ToolLoopGuard } from '../tool-loop-guard.js';
+import { LoopAbortedError } from '../error-classifier.js';
+
+describe('ToolLoopGuard', () => {
+ it('does not throw on a single failure', () => {
+ const g = new ToolLoopGuard();
+ expect(() => g.record('web_search', { q: 'x' }, true)).not.toThrow();
+ });
+
+ it('does not throw on two consecutive identical failures', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x' }, true);
+ expect(() => g.record('web_search', { q: 'x' }, true)).not.toThrow();
+ });
+
+ it('throws LoopAbortedError on the third consecutive identical failure', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'x' }, true);
+ expect(() => g.record('web_search', { q: 'x' }, true)).toThrow(LoopAbortedError);
+ });
+
+ it('throws with the offending tool name and args attached', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'x' }, true);
+ try {
+ g.record('web_search', { q: 'x' }, true);
+ expect.fail('expected throw');
+ } catch (e) {
+ expect(e).toBeInstanceOf(LoopAbortedError);
+ const err = e as LoopAbortedError;
+ expect(err.toolName).toBe('web_search');
+ expect(err.args).toEqual({ q: 'x' });
+ }
+ });
+
+ it('resets on a successful call', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'x' }, false); // success
+ expect(() => g.record('web_search', { q: 'x' }, true)).not.toThrow();
+ expect(() => g.record('web_search', { q: 'x' }, true)).not.toThrow();
+ // Now we have 2 consecutive failures again — third would throw.
+ expect(() => g.record('web_search', { q: 'x' }, true)).toThrow(LoopAbortedError);
+ });
+
+ it('resets on a different-tool call', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_fetch', { url: 'y' }, true);
+ expect(() => g.record('web_search', { q: 'x' }, true)).not.toThrow();
+ });
+
+ it('resets on same-tool different-args', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'x' }, true);
+ g.record('web_search', { q: 'y' }, true); // different args
+ expect(() => g.record('web_search', { q: 'x' }, true)).not.toThrow();
+ });
+
+ it('treats reordered keys as identical args', () => {
+ const g = new ToolLoopGuard();
+ g.record('web_search', { q: 'x', limit: 10 }, true);
+ g.record('web_search', { limit: 10, q: 'x' }, true); // same args, key order differs
+ expect(() => g.record('web_search', { q: 'x', limit: 10 }, true)).toThrow(LoopAbortedError);
+ });
+});
diff --git a/packages/api/src/engine/agent-runner.service.ts b/packages/api/src/engine/agent-runner.service.ts
index 1237d3b..44c7618 100644
--- a/packages/api/src/engine/agent-runner.service.ts
+++ b/packages/api/src/engine/agent-runner.service.ts
@@ -13,7 +13,7 @@
* 8. Build initial messages (system + history + user)
* 9. Save user message to session
* 10. Resolve API key from env vars
- * 11. Create LLMProvider via createProvider, wrap with ResilientLLMProvider
+ * 11. Create LLMProvider via createProvider — recovery is handled inside ReasoningLoop
* 12. Start container
* 13. Create ToolRegistry + registerBuiltinTools + register spawn tool
* 14. Create ReasoningLoop
@@ -58,9 +58,9 @@ import type { MessageStore } from './message-store/message-store.js';
import type { Session } from '../generated/prisma/client.js';
import { ProviderConfigService } from '../provider-config/provider-config.service.js';
import { createProvider } from './providers/provider-factory.js';
-import { ResilientLLMProvider } from './resilience.js';
import { MemoryConsolidationService } from './memory-consolidation.service.js';
import { ReasoningLoop } from './reasoning-loop.js';
+import { CompressorService } from './compressor.js';
import { BudgetTracker } from './budget-tracker.js';
import { ToolRegistry } from './tool-registry.js';
import { registerBuiltinTools, registerMemoryTools, registerCronTools } from './tools/index.js';
@@ -114,6 +114,7 @@ export class AgentRunnerService {
private readonly taskRunRepo: TaskRunRepository,
private readonly taskRunMessageRepo: TaskRunMessageRepository,
private readonly systemSettingsService: SystemSettingsService,
+ private readonly compressor: CompressorService,
) {}
/** Lazy accessor to break circular dependency with TaskExecutorService. */
@@ -267,14 +268,13 @@ export class AgentRunnerService {
// Step 10: Resolve provider credentials (DB first, env var fallback)
const resolved = await this.providerConfig.resolveProvider(agentDef.provider);
- // Step 11: Create LLMProvider, wrap with resilience
- const baseProvider = createProvider(
+ // Step 11: Create LLMProvider
+ const provider = createProvider(
agentDef.provider,
resolved.apiKey,
agentDef.apiBaseUrl ?? resolved.apiBaseUrl ?? undefined,
agentDef.model,
);
- const provider = new ResilientLLMProvider(baseProvider);
// Step 12: Resolve workspace path and acquire container
// Prisma returns containerConfig as JsonValue; cast to the shared type
@@ -420,7 +420,10 @@ export class AgentRunnerService {
);
// Step 14: Create ReasoningLoop
- const loop = new ReasoningLoop(provider, registry);
+ const loop = new ReasoningLoop(provider, registry, this.compressor, {
+ provider: agentDef.provider,
+ model: agentDef.model,
+ });
// Step 15: Run loop
// No default wall-clock timeout — let the model finish. The stale run reaper (10 min) is the safety net.
diff --git a/packages/api/src/engine/compressor.ts b/packages/api/src/engine/compressor.ts
new file mode 100644
index 0000000..c66521d
--- /dev/null
+++ b/packages/api/src/engine/compressor.ts
@@ -0,0 +1,239 @@
+/**
+ * CompressorService — produces a compressed messages array when the agent
+ * runner hits context_overflow. Algorithm: split conversation at the
+ * 2nd-to-last user-message boundary; truncate large tool/system content
+ * in the older portion (microcompact-style); summarize via a small LLM
+ * call; emit:
+ * [original system messages] +
+ * [synthetic summary system message] +
+ * [last 2 user-message cycles verbatim]
+ *
+ * If the resulting messages still exceed the model's context window, the
+ * next provider call fails again and the recovery loop's compress budget
+ * (max 1) is exhausted — the original context_overflow error surfaces to
+ * the user. The compressor itself does not know context-window sizes.
+ */
+
+import { Injectable, Optional } from '@nestjs/common';
+import { createLogger } from '@clawix/shared';
+import type { ChatMessage } from '@clawix/shared';
+
+import { buildConsolidationSystemPrompt, buildConsolidationUserPrompt } from './compact-prompt.js';
+import { ProviderConfigService } from '../provider-config/provider-config.service.js';
+import { SystemSettingsService } from '../system-settings/system-settings.service.js';
+import { createProvider as defaultCreateProvider } from './providers/index.js';
+
+const logger = createLogger('engine:compressor');
+
+const COMPRESSION_KEEP_RECENT_CYCLES = 2;
+const COMPRESSION_MODEL_SETTING_KEY = 'compressionModel';
+const TRUNCATION_THRESHOLD = 500;
+const TRUNCATABLE_ROLES = new Set(['tool', 'system']);
+
+interface ProviderModelRef {
+ readonly provider: string;
+ readonly model: string;
+}
+
+interface ProviderFactory {
+ readonly create: typeof defaultCreateProvider;
+}
+
+/**
+ * Walk backwards through messages and return the index of the first
+ * message in the keep-verbatim section (immediately at or before the
+ * Nth-from-last user message). Returns -1 when there are fewer than N
+ * user-message cycles — caller should leave messages unchanged in that
+ * case.
+ */
+function findVerbatimBoundary(messages: readonly ChatMessage[], keepCycles: number): number {
+ let userSeen = 0;
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
+ if (messages[i]!.role === 'user') {
+ userSeen += 1;
+ if (userSeen === keepCycles) return i;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Return true when boundary `b` is safe with respect to tool_use/tool_result
+ * pairing. A boundary is unsafe when:
+ * 1. The kept side starts with a `role === 'tool'` message (orphan tool_result
+ * whose tool_use assistant message is on the older side), OR
+ * 2. An assistant message on the older side (index < b) has toolCalls whose
+ * ids appear as `toolCallId` on messages at index >= b (cross-cut pair).
+ */
+function isBoundarySafe(messages: readonly ChatMessage[], b: number): boolean {
+ // Condition 1: orphan tool_result at start of kept side.
+ if (messages[b]?.role === 'tool') return false;
+
+ // Condition 2: any tool_use on older side whose tool_result is on kept side.
+ const olderToolUseIds = new Set();
+ for (let i = 0; i < b; i++) {
+ const m = messages[i]!;
+ if (m.role === 'assistant' && m.toolCalls) {
+ for (const tc of m.toolCalls) {
+ if (tc.id) olderToolUseIds.add(tc.id);
+ }
+ }
+ }
+ if (olderToolUseIds.size === 0) return true;
+
+ for (let i = b; i < messages.length; i++) {
+ const m = messages[i]!;
+ if (m.role === 'tool' && m.toolCallId && olderToolUseIds.has(m.toolCallId)) return false;
+ }
+ return true;
+}
+
+/**
+ * Find a safe verbatim boundary that does not split tool_use/tool_result
+ * pairs. Starts from the candidate returned by `findVerbatimBoundary` and
+ * walks backward one user-message anchor at a time until the boundary is
+ * safe. Returns -1 if no safe boundary can be found (caller should leave
+ * messages unchanged).
+ */
+function findSafeBoundary(messages: readonly ChatMessage[], keepCycles: number): number {
+ let boundary = findVerbatimBoundary(messages, keepCycles);
+ if (boundary <= 0) return boundary;
+
+ while (boundary > 0 && !isBoundarySafe(messages, boundary)) {
+ // Walk back to the previous user-message anchor.
+ let prevUser = -1;
+ for (let i = boundary - 1; i >= 0; i--) {
+ if (messages[i]!.role === 'user') {
+ prevUser = i;
+ break;
+ }
+ }
+ if (prevUser < 0) return -1;
+ boundary = prevUser;
+ }
+
+ return isBoundarySafe(messages, boundary) ? boundary : -1;
+}
+
+/**
+ * Truncate large tool/system content in the older portion before sending
+ * to the summarizer. User and assistant messages are preserved intact.
+ * Returns a new array (no mutation).
+ */
+function truncateLargeContent(messages: readonly ChatMessage[]): readonly ChatMessage[] {
+ return messages.map((msg) => {
+ if (!TRUNCATABLE_ROLES.has(msg.role)) return msg;
+ const content = typeof msg.content === 'string' ? msg.content : '';
+ if (content.length <= TRUNCATION_THRESHOLD) return msg;
+ const replacement =
+ msg.role === 'tool'
+ ? `[tool result truncated — originally ${content.length} chars]`
+ : `[system message truncated — originally ${content.length} chars]`;
+ return { ...msg, content: replacement } as ChatMessage;
+ });
+}
+
+/**
+ * Format messages into a human-readable string for the summarizer LLM.
+ */
+function formatMessagesForSummarizer(messages: readonly ChatMessage[]): string {
+ return messages.map((m) => `${m.role}: ${m.content}`).join('\n');
+}
+
+@Injectable()
+export class CompressorService {
+ private readonly providerFactory: ProviderFactory;
+
+ constructor(
+ private readonly providerConfig: ProviderConfigService,
+ private readonly systemSettings: SystemSettingsService,
+ @Optional() providerFactory?: ProviderFactory,
+ ) {
+ this.providerFactory = providerFactory ?? { create: defaultCreateProvider };
+ }
+
+ /**
+ * Compress messages into [system + summary + last N cycles verbatim].
+ * Falls back to fallbackProviderModel if SystemSettings.compressionModel
+ * is unset or invalid.
+ */
+ async compress(
+ messages: readonly ChatMessage[],
+ fallbackProviderModel: ProviderModelRef,
+ ): Promise {
+ const boundary = findSafeBoundary(messages, COMPRESSION_KEEP_RECENT_CYCLES);
+ if (boundary <= 0) {
+ // Not enough user cycles to bother compressing, or no safe boundary found.
+ return messages;
+ }
+
+ const systemMessages = messages.filter((m) => m.role === 'system');
+ const beforeBoundary = messages.slice(0, boundary).filter((m) => m.role !== 'system');
+ // Filter system messages from afterBoundary to prevent duplication — they are
+ // already captured in systemMessages above and prepended to the output.
+ const afterBoundary = messages.slice(boundary).filter((m) => m.role !== 'system');
+
+ const truncated = truncateLargeContent(beforeBoundary);
+ const modelRef = await this.resolveCompressionModel(fallbackProviderModel);
+ const summaryText = await this.callSummarizer(truncated, modelRef);
+
+ return [
+ ...systemMessages,
+ {
+ role: 'system',
+ content: `[Earlier conversation summary]\n${summaryText}`,
+ } as ChatMessage,
+ ...afterBoundary,
+ ];
+ }
+
+ /* ---------------------------- private helpers ---------------------------- */
+
+ private async resolveCompressionModel(fallback: ProviderModelRef): Promise {
+ try {
+ const settings = (await this.systemSettings.get()) as Record;
+ const cm = settings[COMPRESSION_MODEL_SETTING_KEY];
+ if (
+ cm !== null &&
+ cm !== undefined &&
+ typeof cm === 'object' &&
+ 'provider' in cm &&
+ 'model' in cm &&
+ typeof (cm as ProviderModelRef).provider === 'string' &&
+ typeof (cm as ProviderModelRef).model === 'string'
+ ) {
+ const ref = cm as ProviderModelRef;
+ await this.providerConfig.resolveProvider(ref.provider);
+ return ref;
+ }
+ } catch (err) {
+ logger.warn({ err }, 'compressionModel setting invalid — falling back to agent model');
+ }
+ return fallback;
+ }
+
+ private async callSummarizer(
+ older: readonly ChatMessage[],
+ modelRef: ProviderModelRef,
+ ): Promise {
+ const resolved = await this.providerConfig.resolveProvider(modelRef.provider);
+ const provider = this.providerFactory.create(
+ modelRef.provider,
+ resolved.apiKey,
+ resolved.apiBaseUrl ?? undefined,
+ modelRef.model,
+ );
+ const sysPrompt = buildConsolidationSystemPrompt('');
+ const formattedChunk = formatMessagesForSummarizer(older);
+ const userPrompt = buildConsolidationUserPrompt(formattedChunk);
+
+ const response = await provider.chat(
+ [
+ { role: 'system', content: sysPrompt } as ChatMessage,
+ { role: 'user', content: userPrompt } as ChatMessage,
+ ],
+ { model: modelRef.model },
+ );
+ return response.content ?? '';
+ }
+}
diff --git a/packages/api/src/engine/engine.module.ts b/packages/api/src/engine/engine.module.ts
index 0915846..a8e1266 100644
--- a/packages/api/src/engine/engine.module.ts
+++ b/packages/api/src/engine/engine.module.ts
@@ -23,6 +23,7 @@ import { ContextBuilderService } from './context-builder.service.js';
import { BootstrapFileService } from './bootstrap-file.service.js';
import { WorkspaceSeederService } from './workspace-seeder.service.js';
import { StaleRunReaperService } from './stale-run-reaper.service.js';
+import { CompressorService } from './compressor.js';
import { SearchProviderRegistry } from './tools/web/search-provider.js';
import { BraveSearchProvider } from './tools/web/providers/brave.js';
import { DuckDuckGoProvider } from './tools/web/providers/duckduckgo.js';
@@ -49,6 +50,7 @@ import { DuckDuckGoProvider } from './tools/web/providers/duckduckgo.js';
CronTaskProcessorService,
CronSchedulerService,
StaleRunReaperService,
+ CompressorService,
{
provide: SkillLoaderService,
useFactory: () => {
diff --git a/packages/api/src/engine/error-classifier.ts b/packages/api/src/engine/error-classifier.ts
new file mode 100644
index 0000000..9c6b738
--- /dev/null
+++ b/packages/api/src/engine/error-classifier.ts
@@ -0,0 +1,262 @@
+/**
+ * Classifies thrown values from the agent runner into categories with
+ * recovery flags and a user-safe display string.
+ *
+ * Provider-agnostic: matches on patterns from Anthropic / OpenAI / Gemini /
+ * generic OpenAI-compat normalized messages plus undici fetch-cause codes.
+ *
+ * `LoopAbortedError` is defined here (not in tool-loop-guard) because the
+ * classifier needs to recognize it via instanceof and tool-loop-guard
+ * imports the symbol from here — avoids a circular import.
+ */
+
+import type { ClassifiedError, ErrorCategory, RecoveryFlags } from './recovery-loop.types.js';
+
+/* ----------------------------- LoopAbortedError ----------------------------- */
+
+export class LoopAbortedError extends Error {
+ readonly toolName: string;
+ readonly args: unknown;
+ constructor(toolName: string, args: unknown) {
+ super(`Tool ${toolName} failed repeatedly with identical args; aborting`);
+ this.name = 'LoopAbortedError';
+ this.toolName = toolName;
+ this.args = args;
+ }
+}
+
+/* ----------------------------- Flag tables ----------------------------- */
+
+const NO_FLAGS: RecoveryFlags = {
+ retryable: false,
+ compressible: false,
+ rotatable: false,
+ fallbackable: false,
+};
+
+const FLAGS_BY_CATEGORY: Record = {
+ network: { ...NO_FLAGS, retryable: true },
+ timeout: { ...NO_FLAGS, retryable: true },
+ overloaded: { ...NO_FLAGS, retryable: true },
+ server_error: { ...NO_FLAGS, retryable: true },
+ rate_limit: { ...NO_FLAGS, retryable: true },
+ auth: { ...NO_FLAGS, rotatable: true },
+ billing: { ...NO_FLAGS, rotatable: true },
+ model_not_found: { ...NO_FLAGS, fallbackable: true },
+ provider_policy: { ...NO_FLAGS, fallbackable: true },
+ context_overflow: { ...NO_FLAGS, compressible: true },
+ payload_too_large: NO_FLAGS,
+ bad_request: NO_FLAGS,
+ policy: NO_FLAGS,
+ loop_aborted: NO_FLAGS,
+ unknown: NO_FLAGS,
+};
+
+const USER_MESSAGES: Record = {
+ network: "I can't reach the AI provider right now. Please try again in a moment.",
+ timeout: "I can't reach the AI provider right now. Please try again in a moment.",
+ overloaded: 'The AI provider is busy right now. Please try again shortly.',
+ server_error: 'The AI provider hit an internal error. Please try again shortly.',
+ rate_limit: "We've hit a rate limit. Please wait a minute and try again.",
+ auth: 'The AI provider rejected our credentials. An admin needs to check the API key.',
+ billing: "The AI provider's account is out of credits. An admin needs to top up the account.",
+ model_not_found: 'The configured AI model is unavailable. Please contact your administrator.',
+ provider_policy:
+ 'Your message was flagged as potentially unsafe by the AI provider. Try rephrasing your request.',
+ context_overflow:
+ 'This conversation has grown too long for the AI to process. Please start a new chat or simplify your request.',
+ payload_too_large:
+ 'An attached file or image is too large for the AI provider. Please upload a smaller file.',
+ bad_request: "I couldn't process that — the provider rejected the request shape.",
+ policy:
+ "This request isn't allowed by your account's plan or has exceeded its budget. Please contact your administrator.",
+ loop_aborted:
+ 'I got stuck retrying the same step. Please rephrase your request or try a different approach.',
+ unknown: 'Something went wrong while processing your message. Please try again.',
+};
+
+/* ----------------------------- Pattern tables ----------------------------- */
+
+const NETWORK_CODES = new Set([
+ 'ECONNREFUSED',
+ 'ECONNRESET',
+ 'ETIMEDOUT',
+ 'ENOTFOUND',
+ 'EAI_AGAIN',
+ 'EPIPE',
+ 'EHOSTUNREACH',
+ 'ENETUNREACH',
+ 'UND_ERR_CONNECT_TIMEOUT',
+ 'UND_ERR_HEADERS_TIMEOUT',
+ 'UND_ERR_BODY_TIMEOUT',
+ 'UND_ERR_SOCKET',
+]);
+
+const NETWORK_PATTERNS = ['fetch failed', 'network error', 'socket hang up', 'und_err_'];
+
+const TIMEOUT_PATTERNS = [
+ 'status 504',
+ '504 ',
+ 'deadline exceeded',
+ 'request timed out',
+ 'request timeout',
+];
+
+const OVERLOADED_PATTERNS = [
+ 'status 503',
+ '503 ',
+ 'status 529',
+ '529 ',
+ 'overloaded',
+ 'service unavailable',
+];
+
+const SERVER_ERROR_PATTERNS = ['status 500', '500 ', 'status 502', '502 ', 'internal server error'];
+
+const RATE_LIMIT_PATTERNS = [
+ 'rate limit',
+ 'rate_limit',
+ 'quota exceeded',
+ 'status 429',
+ '429 ',
+ 'too many requests',
+];
+
+const AUTH_PATTERNS = [
+ 'auth failed',
+ 'unauthorized',
+ 'api_key_invalid',
+ 'invalid api key',
+ 'status 401',
+ '401 ',
+];
+
+const BILLING_PATTERNS = [
+ 'status 402',
+ '402 ',
+ 'credit balance',
+ 'insufficient_quota',
+ 'insufficient quota',
+ 'out of credits',
+];
+
+const MODEL_NOT_FOUND_PATTERNS = ['model not found', 'model_deprecated', 'model_not_found'];
+
+const PROVIDER_POLICY_PATTERNS = [
+ 'unsafe or sensitive content',
+ 'safety system',
+ 'content policy',
+ 'content_policy',
+ 'content_filter',
+ 'flagged as inappropriate',
+ 'violates our usage policy',
+ 'violates our content policy',
+];
+
+const CONTEXT_OVERFLOW_PATTERNS = [
+ 'context length',
+ 'maximum context',
+ 'context_length_exceeded',
+ 'context window',
+ 'too many tokens',
+];
+
+const PAYLOAD_TOO_LARGE_PATTERNS = ['status 413', '413 ', 'request too large', 'payload too large'];
+
+const BAD_REQUEST_PATTERNS = [
+ 'request rejected',
+ 'invalid argument',
+ 'status 400',
+ '400 ',
+ 'bad request',
+];
+
+const POLICY_PATTERNS = ['is not allowed by policy', 'token budget exceeded', 'is inactive'];
+
+/* ----------------------------- Helpers ----------------------------- */
+
+interface ErrorShape {
+ readonly message: string;
+ readonly code?: string;
+ readonly status?: number;
+}
+
+function extractErrorShape(err: unknown): ErrorShape {
+ if (!(err instanceof Error)) {
+ return { message: typeof err === 'string' ? err : '' };
+ }
+ let message = err.message ?? '';
+ const direct = err as { code?: unknown; status?: unknown; cause?: unknown };
+ let code: string | undefined = typeof direct.code === 'string' ? direct.code : undefined;
+ const status: number | undefined = typeof direct.status === 'number' ? direct.status : undefined;
+ if (direct.cause instanceof Error) {
+ const cause = direct.cause as { code?: unknown; message?: string };
+ if (!code && typeof cause.code === 'string') code = cause.code;
+ if (cause.message) message = `${message} ${cause.message}`;
+ }
+ return { message, code, status };
+}
+
+function matchesAny(haystack: string, needles: readonly string[]): boolean {
+ return needles.some((n) => haystack.includes(n));
+}
+
+function build(category: ErrorCategory, cause: unknown): ClassifiedError {
+ return {
+ category,
+ text: USER_MESSAGES[category],
+ flags: FLAGS_BY_CATEGORY[category],
+ cause,
+ };
+}
+
+/* ----------------------------- Public entry ----------------------------- */
+
+/**
+ * Classify any thrown value into a category, flag set, and user-safe text.
+ *
+ * Order of checks matters — see spec §3.3. Specific patterns (auth, content
+ * filter, context-overflow) are tested before broader ones (bad_request,
+ * network) because some provider messages overlap (e.g. a 401 message that
+ * mentions "connection").
+ */
+export function classifyError(err: unknown): ClassifiedError {
+ if (err instanceof LoopAbortedError) {
+ return build('loop_aborted', err);
+ }
+
+ const shape = extractErrorShape(err);
+ const lower = shape.message.toLowerCase();
+ const code = shape.code;
+
+ if (matchesAny(lower, POLICY_PATTERNS)) return build('policy', err);
+ if (matchesAny(lower, CONTEXT_OVERFLOW_PATTERNS)) return build('context_overflow', err);
+ if (matchesAny(lower, PAYLOAD_TOO_LARGE_PATTERNS) || shape.status === 413) {
+ return build('payload_too_large', err);
+ }
+ if (matchesAny(lower, PROVIDER_POLICY_PATTERNS)) return build('provider_policy', err);
+ if (matchesAny(lower, MODEL_NOT_FOUND_PATTERNS)) return build('model_not_found', err);
+ if (matchesAny(lower, AUTH_PATTERNS) || shape.status === 401) return build('auth', err);
+ if (matchesAny(lower, BILLING_PATTERNS) || shape.status === 402) return build('billing', err);
+ if (matchesAny(lower, RATE_LIMIT_PATTERNS) || shape.status === 429) {
+ return build('rate_limit', err);
+ }
+ if (matchesAny(lower, OVERLOADED_PATTERNS) || shape.status === 503 || shape.status === 529) {
+ return build('overloaded', err);
+ }
+ if (matchesAny(lower, SERVER_ERROR_PATTERNS) || shape.status === 500 || shape.status === 502) {
+ return build('server_error', err);
+ }
+ if (matchesAny(lower, TIMEOUT_PATTERNS) || shape.status === 504) return build('timeout', err);
+ if ((code && NETWORK_CODES.has(code)) || matchesAny(lower, NETWORK_PATTERNS)) {
+ return build('network', err);
+ }
+ if (matchesAny(lower, BAD_REQUEST_PATTERNS) || shape.status === 400) {
+ return build('bad_request', err);
+ }
+ return build('unknown', err);
+}
+
+/* ----------------------------- Re-exports ----------------------------- */
+
+export { USER_MESSAGES };
diff --git a/packages/api/src/engine/reasoning-loop.ts b/packages/api/src/engine/reasoning-loop.ts
index 45c5861..914bfb6 100644
--- a/packages/api/src/engine/reasoning-loop.ts
+++ b/packages/api/src/engine/reasoning-loop.ts
@@ -4,6 +4,11 @@ import type { ChatMessage, ChatOptions, LLMProvider, LLMResponse, LLMUsage } fro
import type { ToolRegistry } from './tool-registry.js';
import type { LoopResult, ReasoningLoopConfig } from './reasoning-loop.types.js';
import type { BudgetTracker } from './budget-tracker.js';
+import { runWithRecovery } from './recovery-loop.js';
+import { classifyError, LoopAbortedError } from './error-classifier.js';
+import { ToolLoopGuard } from './tool-loop-guard.js';
+import { wireRecoveryMetrics, toolLoopAbortedTotal } from './recovery-metrics.js';
+import { CompressorService } from './compressor.js';
const logger = createLogger('engine:reasoning-loop');
@@ -50,20 +55,19 @@ function formatArgs(args: Readonly>): string {
* Stops when: model produces no tool calls, error finish reason, or max iterations reached.
*/
export class ReasoningLoop {
- private readonly provider: LLMProvider;
- private readonly toolRegistry: ToolRegistry;
-
- constructor(provider: LLMProvider, toolRegistry: ToolRegistry) {
- this.provider = provider;
- this.toolRegistry = toolRegistry;
- }
+ constructor(
+ private readonly provider: LLMProvider,
+ private readonly toolRegistry: ToolRegistry,
+ private readonly compressor: CompressorService,
+ private readonly providerInfo: { provider: string; model: string },
+ ) {}
async run(
initialMessages: readonly ChatMessage[],
config?: ReasoningLoopConfig,
): Promise {
const maxIterations = config?.maxIterations ?? DEFAULT_MAX_ITERATIONS;
- const messages: ChatMessage[] = [...initialMessages];
+ let messages: ChatMessage[] = [...initialMessages];
let totalUsage: LLMUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let iterations = 0;
let lastResponse: LLMResponse | null = null;
@@ -120,6 +124,8 @@ export class ReasoningLoop {
abortSignal: abortController.signal,
};
+ const toolLoopGuard = new ToolLoopGuard();
+
try {
while (iterations < maxIterations) {
if (abortController.signal.aborted) {
@@ -161,7 +167,15 @@ export class ReasoningLoop {
let response: LLMResponse;
try {
- response = await this.provider.chat(messages, callOptions);
+ const recoveryResult = await runWithRecovery(this.provider, messages, callOptions, {
+ classifier: classifyError,
+ compressor: (msgs) => this.compressor.compress(msgs, this.providerInfo),
+ onRecoveryEvent: wireRecoveryMetrics,
+ provider: this.providerInfo.provider,
+ });
+ response = recoveryResult.response;
+ // Adopt possibly-compressed messages for subsequent iterations.
+ messages = [...recoveryResult.messages];
} catch (err: unknown) {
// If abort fired while the SDK call was in flight, the provider
// throws an AbortError. Treat that as a clean timeout exit rather
@@ -259,6 +273,14 @@ export class ReasoningLoop {
}
const result = await this.toolRegistry.execute(toolCall.name, toolCall.arguments);
+ try {
+ toolLoopGuard.record(toolCall.name, toolCall.arguments, result.isError);
+ } catch (loopErr) {
+ if (loopErr instanceof LoopAbortedError) {
+ toolLoopAbortedTotal.inc({ tool_name: loopErr.toolName });
+ }
+ throw loopErr;
+ }
messages.push({
role: 'tool',
diff --git a/packages/api/src/engine/recovery-loop.ts b/packages/api/src/engine/recovery-loop.ts
new file mode 100644
index 0000000..9f03b67
--- /dev/null
+++ b/packages/api/src/engine/recovery-loop.ts
@@ -0,0 +1,200 @@
+/**
+ * Classifier-driven recovery layer for the agent runner. Wraps a single
+ * provider.chat() call. On failure, classifies the error and dispatches
+ * one of: retry (with backoff), compress (transform messages and retry),
+ * or surface (no applicable action / budget exhausted).
+ *
+ * Replaces ResilientLLMProvider. See spec §4.
+ */
+
+import { createLogger } from '@clawix/shared';
+import type { ChatMessage, ChatOptions, LLMProvider, LLMResponse } from '@clawix/shared';
+
+import {
+ DEFAULT_RECOVERY_CONFIG,
+ type RecoveryConfig,
+ type RecoveryDeps,
+ type RecoveryEvent,
+} from './recovery-loop.types.js';
+
+const defaultLogger = createLogger('engine:recovery-loop');
+
+interface RecoveryResult {
+ readonly response: LLMResponse;
+ readonly messages: readonly ChatMessage[];
+}
+
+async function delayWithJitter(baseMs: number, abortSignal?: AbortSignal): Promise {
+ if (baseMs <= 0) return;
+ const jittered = baseMs + Math.random() * baseMs * 0.5;
+ return new Promise((resolve, reject) => {
+ if (abortSignal?.aborted) {
+ reject(new Error('aborted'));
+ return;
+ }
+ const timer = setTimeout(() => {
+ abortSignal?.removeEventListener('abort', onAbort);
+ resolve();
+ }, jittered);
+ const onAbort = () => {
+ clearTimeout(timer);
+ reject(new Error('aborted'));
+ };
+ abortSignal?.addEventListener('abort', onAbort, { once: true });
+ });
+}
+
+/**
+ * Run provider.chat() with classifier-driven recovery. The returned
+ * `messages` array is the (possibly compressed) sequence used for the
+ * successful call — the reasoning loop reassigns its outer `messages`
+ * variable so subsequent iterations use it.
+ *
+ * NOTE on streaming (spec §6): in v1 the LLMProvider contract returns the
+ * full response from provider.chat() — there is no token-level streaming
+ * for the recovery loop to short-circuit against. The reasoning-loop's
+ * onEvent emits AFTER provider.chat() returns. The mid-stream guard in
+ * spec §6 is preserved as forward-looking design intent; when token-level
+ * streaming lands in the provider contract, add streamStarted detection
+ * here at that time. For now: every error is pre-stream.
+ */
+export async function runWithRecovery(
+ provider: LLMProvider,
+ messages: readonly ChatMessage[],
+ options: ChatOptions,
+ deps: RecoveryDeps,
+ config: RecoveryConfig = DEFAULT_RECOVERY_CONFIG,
+): Promise {
+ const logger = deps.logger ?? defaultLogger;
+
+ let currentMessages: readonly ChatMessage[] = messages;
+ let retryCount = 0;
+ let compressCount = 0;
+ let totalActions = 0;
+
+ let lastAction: 'retry' | 'compress' | undefined;
+ let lastCategory: RecoveryEvent['category'] | undefined;
+
+ for (;;) {
+ try {
+ const response = await provider.chat(currentMessages, options);
+ if (totalActions > 0) {
+ emit(deps, {
+ type: 'recovery_succeeded',
+ category: lastCategory ?? 'unknown',
+ attempt: totalActions,
+ action: lastAction,
+ provider: deps.provider,
+ });
+ }
+ return { response, messages: currentMessages };
+ } catch (err) {
+ // Cancellation: propagate immediately, no recovery.
+ if (options.abortSignal?.aborted) throw err;
+
+ const classified = deps.classifier(err);
+ totalActions += 1;
+ lastCategory = classified.category;
+
+ // Global safety cap: at most `globalCap` recovery actions per run. The
+ // check fires AFTER incrementing totalActions, so totalActions==globalCap+1
+ // triggers the cap. With default globalCap=5 this allows at most 5 actions
+ // (one of each: 3 retries + 1 compress = 4 in normal usage; cap fires only
+ // in unexpected loops).
+ if (totalActions > config.globalCap) {
+ emit(deps, {
+ type: 'recovery_exhausted',
+ category: classified.category,
+ attempt: totalActions,
+ reason: 'global_cap',
+ provider: deps.provider,
+ });
+ logger.warn(
+ { category: classified.category, totalActions },
+ 'recovery_exhausted: global_cap',
+ );
+ throw err;
+ }
+
+ // Retry path.
+ if (classified.flags.retryable && retryCount < config.maxRetries) {
+ const delayMs = config.retryBackoffMs[retryCount] ?? 0;
+ retryCount += 1;
+ lastAction = 'retry';
+ emit(deps, {
+ type: 'recovery_action',
+ action: 'retry',
+ category: classified.category,
+ attempt: totalActions,
+ delayMs,
+ provider: deps.provider,
+ });
+ logger.warn(
+ { category: classified.category, attempt: totalActions, delayMs },
+ 'recovery_action: retry',
+ );
+ try {
+ await delayWithJitter(delayMs, options.abortSignal);
+ } catch {
+ // Aborted mid-backoff: re-check the signal and propagate the original
+ // error (the one that triggered this retry attempt).
+ if (options.abortSignal?.aborted) throw err;
+ // Otherwise (shouldn't happen — only abort-throws here), surface anyway.
+ throw err;
+ }
+ continue;
+ }
+
+ // Compress path.
+ if (classified.flags.compressible && compressCount < config.maxCompressions) {
+ let compressed: readonly ChatMessage[];
+ try {
+ compressed = await deps.compressor(currentMessages);
+ } catch (compressErr) {
+ emit(deps, {
+ type: 'recovery_exhausted',
+ category: classified.category,
+ attempt: totalActions,
+ reason: 'compress_failed',
+ provider: deps.provider,
+ });
+ logger.warn(
+ { compressErr, category: classified.category },
+ 'recovery_exhausted: compress_failed',
+ );
+ throw err; // surface ORIGINAL provider error
+ }
+ currentMessages = compressed;
+ compressCount += 1;
+ lastAction = 'compress';
+ emit(deps, {
+ type: 'recovery_action',
+ action: 'compress',
+ category: classified.category,
+ attempt: totalActions,
+ provider: deps.provider,
+ });
+ logger.info(
+ { category: classified.category, attempt: totalActions },
+ 'recovery_action: compress',
+ );
+ continue;
+ }
+
+ // No applicable recovery action with budget left.
+ emit(deps, {
+ type: 'recovery_exhausted',
+ category: classified.category,
+ attempt: totalActions,
+ reason: 'no_action',
+ provider: deps.provider,
+ });
+ logger.warn({ category: classified.category }, 'recovery_exhausted: no_action');
+ throw err;
+ }
+ }
+}
+
+function emit(deps: RecoveryDeps, event: RecoveryEvent): void {
+ if (deps.onRecoveryEvent) deps.onRecoveryEvent(event);
+}
diff --git a/packages/api/src/engine/recovery-loop.types.ts b/packages/api/src/engine/recovery-loop.types.ts
new file mode 100644
index 0000000..3d34b78
--- /dev/null
+++ b/packages/api/src/engine/recovery-loop.types.ts
@@ -0,0 +1,92 @@
+/**
+ * Type definitions for the classifier-driven recovery layer in the agent
+ * runner. The classifier categorizes a thrown error and emits flags that
+ * the recovery loop dispatches on. Two flags (rotatable, fallbackable) are
+ * populated correctly today but consumed by deferred follow-up specs.
+ */
+
+import type { Logger } from 'pino';
+import type { ChatMessage } from '@clawix/shared';
+
+/* -------------------------- Categories & flags -------------------------- */
+
+export type ErrorCategory =
+ // Provider transient (retryable)
+ | 'network'
+ | 'timeout'
+ | 'overloaded'
+ | 'server_error'
+ | 'rate_limit'
+ // Provider permanent (recovery deferred)
+ | 'auth'
+ | 'billing'
+ | 'model_not_found'
+ | 'provider_policy'
+ // Provider permanent (no recovery in v1)
+ | 'context_overflow'
+ | 'payload_too_large'
+ | 'bad_request'
+ // Non-provider
+ | 'policy'
+ | 'loop_aborted'
+ | 'unknown';
+
+export interface RecoveryFlags {
+ readonly retryable: boolean;
+ readonly compressible: boolean;
+ readonly rotatable: boolean;
+ readonly fallbackable: boolean;
+}
+
+export interface ClassifiedError {
+ readonly category: ErrorCategory;
+ /** User-safe display text. Never contains stack traces or provider internals. */
+ readonly text: string;
+ readonly flags: RecoveryFlags;
+ /** Original error reference, for logging. Never sent to user. */
+ readonly cause: unknown;
+}
+
+/* ----------------------------- Recovery loop ---------------------------- */
+
+export interface RecoveryConfig {
+ readonly maxRetries: number;
+ readonly retryBackoffMs: readonly number[];
+ readonly maxCompressions: number;
+ readonly globalCap: number;
+}
+
+export const DEFAULT_RECOVERY_CONFIG: RecoveryConfig = {
+ maxRetries: 3,
+ retryBackoffMs: [500, 1000, 2000],
+ maxCompressions: 1,
+ globalCap: 5,
+};
+
+export type RecoveryEventType = 'recovery_action' | 'recovery_exhausted' | 'recovery_succeeded';
+
+export type RecoveryAction = 'retry' | 'compress';
+
+export type RecoveryExhaustedReason = 'global_cap' | 'no_action' | 'compress_failed';
+
+export interface RecoveryEvent {
+ readonly type: RecoveryEventType;
+ readonly category: ErrorCategory;
+ readonly attempt: number;
+ /** Present when type === 'recovery_action' or 'recovery_succeeded'. */
+ readonly action?: RecoveryAction;
+ /** Present when type === 'recovery_action' && action === 'retry'. */
+ readonly delayMs?: number;
+ /** Present when type === 'recovery_exhausted'. */
+ readonly reason?: RecoveryExhaustedReason;
+ /** Provider name; included for metric labels. */
+ readonly provider?: string;
+}
+
+export interface RecoveryDeps {
+ readonly classifier: (err: unknown) => ClassifiedError;
+ readonly compressor: (messages: readonly ChatMessage[]) => Promise;
+ readonly onRecoveryEvent?: (e: RecoveryEvent) => void;
+ readonly logger?: Logger;
+ readonly provider?: string;
+}
diff --git a/packages/api/src/engine/recovery-metrics.ts b/packages/api/src/engine/recovery-metrics.ts
new file mode 100644
index 0000000..37afa5e
--- /dev/null
+++ b/packages/api/src/engine/recovery-metrics.ts
@@ -0,0 +1,64 @@
+/**
+ * Prometheus counters for the recovery layer. Wired into the recovery
+ * loop via the `onRecoveryEvent` dependency so the loop itself stays
+ * pure (testable without a metrics registry).
+ */
+
+import { Counter } from 'prom-client';
+import type { RecoveryEvent } from './recovery-loop.types.js';
+
+export const agentErrorTotal = new Counter({
+ name: 'clawix_agent_error_total',
+ help: 'Agent run errors by classifier category',
+ labelNames: ['category', 'provider'],
+});
+
+export const recoveryActionTotal = new Counter({
+ name: 'clawix_recovery_action_total',
+ help: 'Recovery actions taken by the runner (retry, compress)',
+ labelNames: ['action', 'category', 'provider'],
+});
+
+export const recoveryOutcomeTotal = new Counter({
+ name: 'clawix_recovery_outcome_total',
+ help: 'Final outcome of a recovery loop',
+ labelNames: ['outcome', 'category', 'provider'],
+});
+
+export const toolLoopAbortedTotal = new Counter({
+ name: 'clawix_tool_loop_aborted_total',
+ help: 'Tool-loop guard aborts (3 consecutive identical failures)',
+ labelNames: ['tool_name'],
+});
+
+const UNKNOWN_PROVIDER = 'unknown';
+
+/**
+ * Translate a RecoveryEvent into Prometheus counter increments. Pass this
+ * function as `onRecoveryEvent` when calling `runWithRecovery`.
+ */
+export function wireRecoveryMetrics(event: RecoveryEvent): void {
+ const provider = event.provider ?? UNKNOWN_PROVIDER;
+ switch (event.type) {
+ case 'recovery_action':
+ if (event.action) {
+ recoveryActionTotal.inc({ action: event.action, category: event.category, provider });
+ }
+ return;
+ case 'recovery_succeeded':
+ recoveryOutcomeTotal.inc({ outcome: 'succeeded', category: event.category, provider });
+ return;
+ case 'recovery_exhausted':
+ recoveryOutcomeTotal.inc({
+ outcome: `exhausted_${event.reason ?? 'unknown'}`,
+ category: event.category,
+ provider,
+ });
+ return;
+ default: {
+ const _exhaustive: never = event.type;
+ void _exhaustive;
+ return;
+ }
+ }
+}
diff --git a/packages/api/src/engine/resilience.ts b/packages/api/src/engine/resilience.ts
deleted file mode 100644
index 45d937e..0000000
--- a/packages/api/src/engine/resilience.ts
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * ResilientLLMProvider — wraps any LLMProvider with retry + jitter backoff
- * for transient errors (rate limits, server errors, network timeouts).
- */
-
-import type { ChatMessage, ChatOptions, LLMProvider, LLMResponse } from '@clawix/shared';
-import { createLogger } from '@clawix/shared';
-
-const logger = createLogger('engine:resilience');
-
-/* ------------------------------------------------------------------ */
-/* Types */
-/* ------------------------------------------------------------------ */
-
-/** Configuration for retry behaviour. */
-export interface RetryConfig {
- /** Number of retries after the initial attempt. */
- readonly maxRetries: number;
- /**
- * Base delay in milliseconds for each retry attempt.
- * Array length must equal maxRetries. Set entries to 0 in tests.
- */
- readonly backoffMs: readonly number[];
- /** Substrings (case-insensitive) that identify a transient error. */
- readonly transientPatterns: readonly string[];
-}
-
-/* ------------------------------------------------------------------ */
-/* Defaults */
-/* ------------------------------------------------------------------ */
-
-/** Default retry configuration shipped with Clawix. */
-export const DEFAULT_RETRY_CONFIG: RetryConfig = {
- maxRetries: 3,
- backoffMs: [500, 1000, 2000],
- transientPatterns: [
- 'status 429',
- 'rate limit',
- 'rate_limit',
- 'status 500',
- 'status 502',
- 'status 503',
- 'status 504',
- 'overloaded',
- 'timeout',
- 'ETIMEDOUT',
- 'ECONNRESET',
- 'connection',
- 'server error',
- 'internal server error',
- ],
-};
-
-/* ------------------------------------------------------------------ */
-/* Helpers */
-/* ------------------------------------------------------------------ */
-
-/**
- * Returns `true` when the error message matches a known transient pattern.
- * Matching is case-insensitive substring search.
- *
- * @param message - The error message to test.
- * @param patterns - Override pattern list (defaults to DEFAULT_RETRY_CONFIG.transientPatterns).
- */
-export function isTransientError(
- message: string,
- patterns: readonly string[] = DEFAULT_RETRY_CONFIG.transientPatterns,
-): boolean {
- const lower = message.toLowerCase();
- return patterns.some((pattern) => lower.includes(pattern.toLowerCase()));
-}
-
-/** Sleeps for `ms` milliseconds, adding up to 50 % random jitter. */
-async function delayWithJitter(baseMs: number): Promise {
- if (baseMs <= 0) return;
- const jittered = baseMs + Math.random() * baseMs * 0.5;
- await new Promise((resolve) => setTimeout(resolve, jittered));
-}
-
-/* ------------------------------------------------------------------ */
-/* ResilientLLMProvider */
-/* ------------------------------------------------------------------ */
-
-/**
- * Decorates an `LLMProvider` with retry logic.
- *
- * On every transient failure the provider waits `backoffMs[attempt]`
- * (plus ≤50 % random jitter) before retrying. Non-transient errors
- * are re-thrown immediately without consuming any retry budget.
- */
-export class ResilientLLMProvider implements LLMProvider {
- private readonly inner: LLMProvider;
- private readonly config: RetryConfig;
-
- constructor(inner: LLMProvider, config?: Partial) {
- this.inner = inner;
- this.config = {
- ...DEFAULT_RETRY_CONFIG,
- ...config,
- // Merge arrays explicitly so partial overrides work correctly.
- backoffMs: config?.backoffMs ?? DEFAULT_RETRY_CONFIG.backoffMs,
- transientPatterns: config?.transientPatterns ?? DEFAULT_RETRY_CONFIG.transientPatterns,
- };
- }
-
- /** Delegates to the inner provider's name. */
- get name(): string {
- return this.inner.name;
- }
-
- /**
- * Sends a chat request, retrying on transient errors up to `maxRetries` times.
- */
- async chat(messages: readonly ChatMessage[], options?: ChatOptions): Promise {
- let lastError: unknown;
-
- for (let attempt = 0; attempt <= this.config.maxRetries; attempt += 1) {
- try {
- return await this.inner.chat(messages, options);
- } catch (err: unknown) {
- const message = err instanceof Error ? err.message : String(err);
-
- // If the caller's abort signal has fired, the error is intentional
- // cancellation — propagate immediately without retrying. Otherwise
- // a timeout/cancel would just trigger N retries before bubbling up.
- if (options?.abortSignal?.aborted) {
- throw err;
- }
-
- if (!isTransientError(message, this.config.transientPatterns)) {
- // Not a transient error — propagate immediately.
- throw err;
- }
-
- lastError = err;
- const retriesLeft = this.config.maxRetries - attempt;
-
- if (retriesLeft === 0) {
- break;
- }
-
- const baseMs = this.config.backoffMs[attempt] ?? 0;
- logger.warn(
- {
- attempt,
- retriesLeft,
- errorMessage: message,
- delayMs: baseMs,
- },
- 'Transient LLM error — retrying',
- );
-
- await delayWithJitter(baseMs);
- }
- }
-
- throw lastError;
- }
-}
diff --git a/packages/api/src/engine/tool-loop-guard.ts b/packages/api/src/engine/tool-loop-guard.ts
new file mode 100644
index 0000000..249b112
--- /dev/null
+++ b/packages/api/src/engine/tool-loop-guard.ts
@@ -0,0 +1,60 @@
+/**
+ * ToolLoopGuard — detects pathological tool-call loops where the agent
+ * repeatedly invokes the same tool with identical args after each call
+ * fails. After 3 consecutive identical failing calls, throws
+ * LoopAbortedError to terminate the run cleanly.
+ *
+ * Resets on any non-matching call (different tool, different args, or
+ * a successful call with the same args).
+ */
+
+import { LoopAbortedError } from './error-classifier.js';
+
+const TOOL_LOOP_THRESHOLD = 3;
+
+/**
+ * Stable JSON serialization with sorted keys so {a:1,b:2} and {b:2,a:1}
+ * produce the same canonical string.
+ */
+function stableStringify(value: unknown): string {
+ if (value === null || typeof value !== 'object') {
+ return JSON.stringify(value);
+ }
+ if (Array.isArray(value)) {
+ return `[${value.map(stableStringify).join(',')}]`;
+ }
+ const obj = value as Record;
+ const keys = Object.keys(obj).sort();
+ return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(',')}}`;
+}
+
+function canonicalizeArgs(toolName: string, args: unknown): string {
+ return `${toolName}:${stableStringify(args)}`;
+}
+
+export class ToolLoopGuard {
+ private lastFailingHash: string | null = null;
+ private consecutiveFailures = 0;
+
+ /**
+ * Record a tool execution result. Throws LoopAbortedError on the 3rd
+ * consecutive failing call with identical (toolName, args).
+ */
+ record(toolName: string, args: unknown, isError: boolean): void {
+ const hash = canonicalizeArgs(toolName, args);
+ if (isError && hash === this.lastFailingHash) {
+ this.consecutiveFailures += 1;
+ if (this.consecutiveFailures >= TOOL_LOOP_THRESHOLD) {
+ throw new LoopAbortedError(toolName, args);
+ }
+ return;
+ }
+ if (isError) {
+ this.lastFailingHash = hash;
+ this.consecutiveFailures = 1;
+ } else {
+ this.lastFailingHash = null;
+ this.consecutiveFailures = 0;
+ }
+ }
+}
diff --git a/packages/api/src/engine/tool.ts b/packages/api/src/engine/tool.ts
index d20aea3..b037d7f 100644
--- a/packages/api/src/engine/tool.ts
+++ b/packages/api/src/engine/tool.ts
@@ -3,6 +3,12 @@ import type { ToolDefinition } from '@clawix/shared';
/** Result returned by a tool execution. */
export interface ToolResult {
readonly output: string;
+
+ /**
+ * True when the tool failed to produce a normal result and the `output`
+ * contains an error description. Used by ToolLoopGuard to detect
+ * pathological retry-the-same-broken-call patterns.
+ */
readonly isError: boolean;
}
diff --git a/packages/api/src/main.ts b/packages/api/src/main.ts
index 242d7ab..84f59c1 100644
--- a/packages/api/src/main.ts
+++ b/packages/api/src/main.ts
@@ -32,6 +32,11 @@ async function bootstrap() {
AppModule,
new FastifyAdapter({
logger: false,
+ // Honor X-Forwarded-Proto / X-Forwarded-For when behind a TLS-terminating
+ // reverse proxy (Caddy, Traefik, nginx, Tailscale Funnel). request.protocol
+ // then reflects the original client scheme, which auth.controller uses to
+ // decide whether to mark the refresh cookie Secure.
+ trustProxy: true,
}),
{
logger: {
diff --git a/packages/shared/src/providers/__tests__/install-parity.test.ts b/packages/shared/src/providers/__tests__/install-parity.test.ts
new file mode 100644
index 0000000..76bc671
--- /dev/null
+++ b/packages/shared/src/providers/__tests__/install-parity.test.ts
@@ -0,0 +1,30 @@
+import { readFileSync } from 'node:fs';
+import { dirname, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { describe, expect, it } from 'vitest';
+
+import { listProviders } from '../provider-registry.js';
+
+// scripts/install.mjs hand-mirrors the provider registry into its
+// numbered prompt catalog. This test guards against drift: every
+// non-custom provider in the registry must appear in the installer
+// source by both `name` and `envKey`. Custom is handled by the
+// "Custom" branch, so it is excluded.
+const HERE = dirname(fileURLToPath(import.meta.url));
+const REPO_ROOT = resolve(HERE, '../../../../..');
+const INSTALLER_PATH = resolve(REPO_ROOT, 'scripts/install.mjs');
+
+describe('installer / provider-registry parity', () => {
+ const installerSource = readFileSync(INSTALLER_PATH, 'utf8');
+ const builtIns = listProviders().filter((p) => p.name !== 'custom');
+
+ for (const provider of builtIns) {
+ it(`installer references ${provider.name} by id`, () => {
+ expect(installerSource).toContain(`id: '${provider.name}'`);
+ });
+
+ it(`installer references ${provider.name} envKey`, () => {
+ expect(installerSource).toContain(`envKey: '${provider.envKey}'`);
+ });
+ }
+});
diff --git a/packages/shared/src/schemas/agent.schema.ts b/packages/shared/src/schemas/agent.schema.ts
index 405741d..f4fd981 100644
--- a/packages/shared/src/schemas/agent.schema.ts
+++ b/packages/shared/src/schemas/agent.schema.ts
@@ -36,6 +36,12 @@ export const createAgentDefinitionSchema = z.object({
* separate messages. Off by default for backward compatibility.
*/
streamingEnabled: z.boolean().default(false),
+ /**
+ * When true, the agent is a Public (official) agent visible to all users.
+ * The server enforces that only admins may set this to true; for all other
+ * callers the value is forced to false in the service layer.
+ */
+ isOfficial: z.boolean().default(false),
});
export const updateAgentDefinitionSchema = createAgentDefinitionSchema
diff --git a/packages/shared/src/schemas/auth.schema.ts b/packages/shared/src/schemas/auth.schema.ts
index f221aee..33e3d91 100644
--- a/packages/shared/src/schemas/auth.schema.ts
+++ b/packages/shared/src/schemas/auth.schema.ts
@@ -5,8 +5,10 @@ export const loginSchema = z.object({
password: z.string().min(1, 'Password is required').max(128, 'Password too long'),
});
+// refreshToken is optional in the body because it can also be supplied via
+// the httpOnly clawix_refresh cookie. Controllers prefer cookie over body.
export const refreshSchema = z.object({
- refreshToken: z.string().min(1, 'Refresh token is required'),
+ refreshToken: z.string().min(1, 'Refresh token is required').optional(),
});
export type LoginInput = z.infer;
diff --git a/packages/shared/src/schemas/user.schema.ts b/packages/shared/src/schemas/user.schema.ts
index 480f979..f881ef8 100644
--- a/packages/shared/src/schemas/user.schema.ts
+++ b/packages/shared/src/schemas/user.schema.ts
@@ -14,6 +14,7 @@ export const updateUserSchema = z.object({
name: z.string().min(1).max(255).optional(),
role: userRoleSchema.optional(),
isActive: z.boolean().optional(),
+ policyId: z.string().cuid().optional(),
});
export type CreateUserInput = z.infer;
diff --git a/packages/web/src/app/(dashboard)/workspace/upload-zone.tsx b/packages/web/src/app/(dashboard)/workspace/upload-zone.tsx
index 464b5a9..9c241a9 100644
--- a/packages/web/src/app/(dashboard)/workspace/upload-zone.tsx
+++ b/packages/web/src/app/(dashboard)/workspace/upload-zone.tsx
@@ -98,12 +98,15 @@ export function UploadZone({ currentPath, onUploadComplete, onClose }: UploadZon
return;
}
+ // @fastify/multipart's req.file() only exposes fields that were
+ // streamed BEFORE the file part. Append text fields first so the
+ // controller can read req.file().fields['path'] / ['relativePath'].
const formData = new FormData();
- formData.append('file', file);
formData.append('path', currentPath);
if (relativePath) {
formData.append('relativePath', relativePath);
}
+ formData.append('file', file);
await new Promise((resolve) => {
const xhr = new XMLHttpRequest();
diff --git a/packages/web/src/app/login/page.tsx b/packages/web/src/app/login/page.tsx
index c2af9df..fb686ee 100644
--- a/packages/web/src/app/login/page.tsx
+++ b/packages/web/src/app/login/page.tsx
@@ -1,6 +1,6 @@
'use client';
-import { Suspense, useState } from 'react';
+import { Suspense, useEffect, useState } from 'react';
import { useRouter, useSearchParams } from 'next/navigation';
import Image from 'next/image';
import { Eye, EyeOff, GalleryVerticalEnd, Loader2 } from 'lucide-react';
@@ -8,6 +8,7 @@ import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Label } from '@/components/ui/label';
import { useAuth } from '@/components/auth-provider';
+import { ApiError } from '@/lib/api';
export default function LoginPage() {
return (
@@ -26,9 +27,23 @@ function LoginForm() {
const [error, setError] = useState('');
const [isLoading, setIsLoading] = useState(false);
const [showPassword, setShowPassword] = useState(false);
+ const [waitTime, setWaitTime] = useState(0);
+
+ // Countdown timer when the API returns 429 from progressive delay or
+ // the per-IP throttler. Re-enables the form when it hits 0.
+ useEffect(() => {
+ if (waitTime <= 0) return;
+ const timer = setInterval(() => {
+ setWaitTime((prev) => Math.max(0, prev - 1));
+ }, 1000);
+ return () => {
+ clearInterval(timer);
+ };
+ }, [waitTime]);
async function handleSubmit(e: React.SyntheticEvent) {
e.preventDefault();
+ if (waitTime > 0) return;
setError('');
setIsLoading(true);
@@ -39,12 +54,23 @@ function LoginForm() {
const target = redirect?.startsWith('/') ? redirect : '/conversations';
router.push(target);
} catch (err) {
- setError(err instanceof Error ? err.message : 'Login failed');
+ if (err instanceof ApiError && err.status === 429) {
+ // Server messages: "Too many attempts. Try again in 8s" (per-email)
+ // or "ThrottlerException: Too Many Requests" (per-IP, no Xs).
+ const match = /(\d+)s/.exec(err.message);
+ const seconds = match ? Math.min(Number(match[1]), 60) : 30;
+ setWaitTime(seconds);
+ setError(err.message);
+ } else {
+ setError(err instanceof Error ? err.message : 'Login failed');
+ }
} finally {
setIsLoading(false);
}
}
+ const isDisabled = isLoading || waitTime > 0;
+
return (
{/* Left panel */}
@@ -88,7 +114,7 @@ function LoginForm() {
setEmail(e.target.value);
}}
required
- disabled={isLoading}
+ disabled={isDisabled}
/>
@@ -104,7 +130,7 @@ function LoginForm() {
setPassword(e.target.value);
}}
required
- disabled={isLoading}
+ disabled={isDisabled}
className="pr-10"
/>
}
-