diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9b80273 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,35 @@ +# Version control +.git/ +.gitignore + +# Python artifacts +.venv/ +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ +*.egg-info/ +dist/ +build/ +.mypy_cache/ +.ruff_cache/ + +# Tests and fixtures — not needed in the image +tests/ + +# Dev and internal docs +.docs/ +.claude/ + +# Infrastructure-as-code — not part of the application +infra/ + +# Runtime data — always mounted at runtime, never baked in +logs/ +data/ +conf.yaml + +# Secrets — never in the image +.env +.env.* diff --git a/.env.example b/.env.example index c9f2219..c674343 100644 --- a/.env.example +++ b/.env.example @@ -1,45 +1,97 @@ # ────────────────────────────────────────────────────────────────────────────── -# ARIA — Secrets template +# ARIA — Secrets and runtime env vars template # Copy this file to .env and fill in your values. Never commit .env to Git. # Non-secret configuration (model IDs, connector types, GCP settings, etc.) -# lives in conf.yaml — see conf_template.yaml. +# lives in conf.yaml — see conf_template.yaml for all options. # ────────────────────────────────────────────────────────────────────────────── + +# ── Required (all deployments) ──────────────────────────────────────────────── + # ServiceNow — password for the service account defined in conf.yaml (servicenow.user) SNOW_PASSWORD= -# Anthropic — API key for LLM calls across all agents -# Reference implementation uses Anthropic. Swap for your provider if you bring -# your own LLMClientInterface implementation. +# Slack — bot token with chat:write scope (channel set in conf.yaml slack.channel_id) +SLACK_BOT_TOKEN= + + +# ── LLM provider — set ONE block depending on llm.provider in conf.yaml ─────── + +# --- anthropic (llm.provider: anthropic) — recommended for non-GCP deployments ANTHROPIC_API_KEY= -# Slack — bot token with chat:write scope, for the channel defined in conf.yaml (slack.channel_id) -SLACK_BOT_TOKEN= +# --- vertex_ai (llm.provider: vertex_ai) — GCP container deployments (no API key needed) +# Auth is via ADC — set GOOGLE_APPLICATION_CREDENTIALS if not running on GKE/Cloud Run. +# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json # only if ADC is not auto-resolved +VERTEX_AI_PROJECT_ID= +VERTEX_AI_LOCATION=europe-west1 -# MS Teams (optional — alternative notifier; swap connector in dependencies.py) -TEAMS_WEBHOOK_URL= +# --- claude_code (llm.provider: claude_code) — local dev only, NOT for production (#84) +# No additional env vars needed; uses the local Claude Code CLI subscription. + + +# ── Vault backend — set ONE block depending on runtime.vault_backend in conf.yaml ─ + +# --- env (default) — secrets come from this .env file; no additional config needed. + +# --- gcp — GCP Secret Manager via ADC +GCP_PROJECT_ID= + +# --- hashicorp — HashiCorp Vault +VAULT_TOKEN= + +# --- aws — AWS Secrets Manager +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= -# Google Chat (optional — alternative notifier; swap connector in dependencies.py) -GOOGLE_CHAT_WEBHOOK_URL= + +# ── Pipeline behaviour ──────────────────────────────────────────────────────── + +# Path to a custom conf.yaml (default: ./conf.yaml relative to working directory) +# ARIA_CONFIG_PATH=/etc/aria/conf.yaml + +# Enable the built-in Alpine.js ops dashboard at /dashboard +# ARIA_DASHBOARD_ENABLED=true + +# Operating mode: inform | hitm | autonomous (only 'inform' is implemented in Phase 1.5) +# ARIA_OPERATING_MODE=inform + +# LLM provider override — overrides llm.provider in conf.yaml +# ARIA_LLM_PROVIDER=anthropic + +# Vault backend override — overrides runtime.vault_backend in conf.yaml +# ARIA_VAULT_BACKEND=env + +# Log format: human (coloured, for terminals) | json (for log aggregators) +# ARIA_LOG_FORMAT=human + +# Log directory for rolling file output +# ARIA_LOG_DIR=logs/ + +# SQLite run history database path +# ARIA_RUN_DB_PATH=data/runs.db + +# Dry-run mode — uses in-memory stubs; no real ServiceNow/Slack/SSH calls +# ARIA_DRY_RUN=false + + +# ── Optional connectors ─────────────────────────────────────────────────────── # CDP — SSH private key PEM content for Agent 2 log extraction from Cloudera CDP nodes # Set via: export CDP_SSH_KEY="$(cat /path/to/private_key)" CDP_SSH_KEY= -# CDP — SSH host public key for strict host verification (recommended, prevents MITM attacks) +# CDP — SSH host public key for strict host verification (prevents MITM attacks) # Format: " " e.g. "ssh-ed25519 AAAA..." -# If not set, ARIA falls back to WarningPolicy (logs a warning but still connects) +# Leave empty to use WarningPolicy (logs a warning but still connects) CDP_HOST_KEY= -# GCP — service account JSON key (base64-encoded) for BigQuery and GCS access -# Only required when connectors.log = gcp in conf.yaml +# GCP — service account JSON key (base64-encoded) for Cloud Logging / BigQuery access +# Only needed if NOT using ADC (e.g. running outside GCP with a SA key file) GCP_SA_KEY= -# AWS — credentials for the AWS Secrets Manager vault implementation -# Only required if you are using the AWS SM vault backend -AWS_ACCESS_KEY_ID= -AWS_SECRET_ACCESS_KEY= +# MS Teams (optional alternative notifier — swap connector in dependencies.py) +TEAMS_WEBHOOK_URL= -# HashiCorp Vault — token for the Vault vault implementation -# Only required if you are using the Vault backend -VAULT_TOKEN= +# Google Chat (optional alternative notifier — swap connector in dependencies.py) +GOOGLE_CHAT_WEBHOOK_URL= diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25ae23c..1fcf98b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,3 +52,31 @@ jobs: - name: pytest (unit) run: pytest tests/unit/ -v + + docker-smoke: + name: Docker build + smoke test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build image + run: docker build -t aria:ci . + + - name: Smoke test + run: | + docker run -d --name aria-ci \ + -e ARIA_DRY_RUN=true \ + -e ARIA_LLM_PROVIDER=anthropic \ + -e ANTHROPIC_API_KEY=dummy \ + -p 8000:8000 aria:ci + # Wait for the API to boot (health check starts after start_period). + for i in $(seq 1 15); do + if curl -sf http://localhost:8000/api/v1/health; then + echo "Health check passed" + break + fi + echo "Waiting... ($i/15)" + sleep 2 + done + curl -sf http://localhost:8000/api/v1/health + docker stop aria-ci diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a9e49c7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.11-slim + +# curl is needed for the HEALTHCHECK command below. +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Create a non-root user before copying any files. +RUN adduser --disabled-password --uid 1000 aria + +# Install dependencies first so this layer is cached when only source changes. +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application source. +COPY . . + +RUN chown -R aria:aria /app +USER aria + +EXPOSE 8000 + +# Health check hits the /health endpoint — fails fast if the API is down. +HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ + CMD curl -f http://localhost:8000/api/v1/health || exit 1 + +CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index c1ed4c8..e46a5d9 100644 --- a/README.md +++ b/README.md @@ -549,6 +549,51 @@ uvicorn api.main:app --reload --- +## Deployment + +ARIA ships as a single Docker image. No Python installation is required on the target machine — only Docker (local/VM) or a Kubernetes cluster (production). The same image works across all environments; what changes is how `conf.yaml` and secrets are injected. + +### Docker (local machine or VM) + +```bash +# 1. Build +docker build -t aria:latest . + +# 2. Run — mount your conf.yaml; pass secrets as env vars +docker run -d \ + --name aria \ + -p 8000:8000 \ + -v /path/to/conf.yaml:/etc/aria/conf.yaml:ro \ + -e ARIA_CONFIG_PATH=/etc/aria/conf.yaml \ + -e SNOW_PASSWORD= \ + -e ANTHROPIC_API_KEY= \ + -e SLACK_BOT_TOKEN= \ + aria:latest + +# 3. Verify +curl http://localhost:8000/api/v1/health +``` + +### Kubernetes + +`conf.yaml` is delivered via a ConfigMap; secrets via a Kubernetes Secret or GCP Secret Manager (Workload Identity, no API key in the pod): + +```bash +kubectl create namespace aria +kubectl create configmap aria-config --from-file=conf.yaml=./conf.yaml -n aria +kubectl create secret generic aria-secrets \ + --from-literal=SNOW_PASSWORD= \ + --from-literal=ANTHROPIC_API_KEY= \ + --from-literal=SLACK_BOT_TOKEN= \ + -n aria +``` + +Then apply a Deployment that mounts the ConfigMap at `/etc/aria/conf.yaml` and sets `ARIA_CONFIG_PATH=/etc/aria/conf.yaml`. For GCP clusters, set `llm.provider: vertex_ai` and `runtime.vault_backend: gcp` in `conf.yaml` — the pod authenticates via Workload Identity with no credentials in the container. + +**Full guide** (conf.yaml preparation, docker-compose, GKE Deployment + Service YAML, LLM provider selection, vault backend options): [documentation/guides/installation.md](documentation/guides/installation.md) + +--- + ## Acceptance criteria (Phase 1) Phase 1 is complete when all of the following pass on 10 consecutive test incidents: @@ -580,8 +625,8 @@ Phase 1 is complete when all of the following pass on 10 consecutive test incide | Phase 1 | S8: ReAct loop trigger — cross-service log requests | ✅ Done | | Phase 1 | M7: Acceptance criteria validated on local environment | ✅ Done | | Phase 1.5 | S1: Structured logging — structlog, `run_id`, lifecycle events, RunRecord | ✅ Done | -| **Phase 1.5** | **S2: Monitoring foundation — run store, REST API, Alpine.js dashboard, mode scaffold** | 🔜 Next | -| Phase 1.5 | S3: Docker + `ARIA_CONFIG_PATH` + `VertexAILLMClient` + LLM provider DI | 🔜 Planned | +| Phase 1.5 | S2: Monitoring foundation — run store, REST API, Alpine.js dashboard, mode scaffold | ✅ Done | +| Phase 1.5 | S3: Docker + `ARIA_CONFIG_PATH` + `VertexAILLMClient` + LLM provider DI (incl. #84 security fix) | ✅ Done | | Phase 1.5 | S4: Testing infrastructure — UC1/UC2/UC3 cluster wiring, KB runbooks, CMDB validation | 🔜 Planned | | Phase 1.5 | S5: Round 2 acceptance testing — 30 incidents on UC1 + UC2 real infrastructure | 🔜 Planned | | Phase 1.5 | S6: GCP native connectors — BQ, Cloud Functions, Pub/Sub, GCS | 🔜 Planned | diff --git a/api/dependencies.py b/api/dependencies.py index 97a4eec..12ce1fd 100644 --- a/api/dependencies.py +++ b/api/dependencies.py @@ -15,14 +15,15 @@ from core.agents.incident_reader import IncidentReaderAgent from core.agents.log_extractor import LogExtractorAgent from core.agents.notifier import NotifierAgent +from core.interfaces.llm_client import LLMClientInterface from core.interfaces.run_state_store import RunStateStoreInterface from core.interfaces.run_store import RunStoreInterface +from core.interfaces.vault import VaultInterface from core.models import PlatformTag from core.orchestrator.pipeline import ARIAPipeline from implementations.clusters.cloud.gcp.log_connector import GCPLogConnector from implementations.clusters.onprem.log_connector import SSHLogConnector from implementations.itsm.servicenow.connector import ServiceNowConnector -from implementations.llm.claude_code.llm_client import ClaudeCodeLLMClient as LLMClient from implementations.storage.memory_run_state_store import InMemoryRunStateStore from implementations.storage.sqlite_run_store import SQLiteRunStore from implementations.vault.envvar import EnvVarVault @@ -33,6 +34,51 @@ def _resolve_model(agent_num: str) -> str | None: return cfg.resolve_model(agent_num) +def _get_llm_client(model: str) -> LLMClientInterface: + """Instantiate the LLM client for the configured provider. + + Provider is read from llm.provider in conf.yaml / ARIA_LLM_PROVIDER env var. + Defaults to 'anthropic' — direct API, no local tool access (#84 security fix). + 'claude_code' is available but must be explicitly opted into in conf.yaml. + + Raises: + ValueError: If the provider name is not recognised. + """ + provider = cfg.llm_provider() + if provider == "anthropic": + from implementations.llm.anthropic.llm_client import AnthropicLLMClient + + return AnthropicLLMClient(model=model) + if provider == "claude_code": + from implementations.llm.claude_code.llm_client import ClaudeCodeLLMClient + + return ClaudeCodeLLMClient(model=model) + if provider == "vertex_ai": + from implementations.llm.vertex_ai.llm_client import VertexAILLMClient + + project_id = cfg.gcp_project_id() + location = cfg.gcp_region() + return VertexAILLMClient(model=model, project_id=project_id, location=location) + raise ValueError( + f"Unknown llm.provider: '{provider}'. Must be one of: anthropic | claude_code | vertex_ai" + ) + + +def _get_vault() -> VaultInterface: + """Instantiate the vault backend for the configured secret store. + + Backend is read from runtime.vault_backend in conf.yaml / ARIA_VAULT_BACKEND env var. + Defaults to 'env' (EnvVarVault) — reads secrets from environment variables. + """ + backend = cfg.vault_backend() + if backend == "gcp": + from implementations.vault.gcp_secret_manager import GCPSecretManagerVault + + return GCPSecretManagerVault.from_env() + # hashicorp, aws, azure already have implementations — wire them here as they get used. + return EnvVarVault() + + @lru_cache(maxsize=1) def get_run_store() -> RunStoreInterface: """Build and cache the after-action run history store (P1.5 S2). @@ -57,7 +103,7 @@ def get_run_state_store() -> RunStateStoreInterface: def get_agent1() -> IncidentReaderAgent: """Build and cache the Agent 1 (Incident Reader) instance. - Injects ServiceNow connector, Claude Code LLM client, and optionally the + Injects ServiceNow connector, the configured LLM client, and optionally the CMDBResolver when SNOW credentials are present. CMDBResolver absence is non-fatal — Agent 1 falls back to Path 3 (LLM-only) for CI resolution. @@ -70,7 +116,7 @@ def get_agent1() -> IncidentReaderAgent: raise ValueError( "ARIA_AGENT1_MODEL env var is not set (or ARIA_GLOBAL_MODEL when ARIA_LLM_MODE=global)" ) - llm = LLMClient(model=model) + llm = _get_llm_client(model) # Inject CMDBResolver when SNOW vars are present — enables Path 1 and Path 2 # CI resolution. Without it, every incident falls through to Path 3 (LLM). @@ -98,7 +144,7 @@ def get_agent3() -> ClassifierAgent: "ARIA_AGENT3_MODEL env var is not set " "(or ARIA_GLOBAL_MODEL when ARIA_LLM_MODE=global)" ) - return ClassifierAgent(llm_client=LLMClient(model=model)) + return ClassifierAgent(llm_client=_get_llm_client(model)) @lru_cache(maxsize=1) @@ -117,7 +163,7 @@ def get_agent4() -> NotifierAgent: llm = None model = cfg.llm_agent_model("4") if model: - llm = LLMClient(model=model) + llm = _get_llm_client(model) return NotifierAgent( communicator=SlackConnector(token=token, channel_id=channel), @@ -148,7 +194,7 @@ def get_pipeline() -> "ARIAPipeline": ) agent1 = IncidentReaderAgent( connector=InMemoryConnector(fixture_path=Path("tests/fixtures/sample_incidents.json")), - llm_client=LLMClient(model=model1), + llm_client=_get_llm_client(model1), ) agent2 = LogExtractorAgent( connector_registry={ @@ -163,7 +209,7 @@ def get_pipeline() -> "ARIAPipeline": agent1 = get_agent1() agent2 = get_agent2() model3 = _resolve_model("3") - agent3 = ClassifierAgent(llm_client=LLMClient(model=model3) if model3 else None) + agent3 = ClassifierAgent(llm_client=_get_llm_client(model3) if model3 else None) agent4 = get_agent4() # Monitoring stores (P1.5 S2): the API pipeline always records run history @@ -187,7 +233,7 @@ def get_agent2() -> LogExtractorAgent: and return empty results gracefully if credentials are absent. Injects an LLM client for query planning if ARIA_AGENT2_MODEL is set. """ - vault = EnvVarVault() + vault = _get_vault() # Both connectors call vault.get_secret() only at query time — construction # never fails. Missing credentials surface as graceful empty results at # request time rather than crashing the API on startup. @@ -204,5 +250,5 @@ def get_agent2() -> LogExtractorAgent: llm = None model = _resolve_model("2") if model: - llm = LLMClient(model=model) + llm = _get_llm_client(model) return LogExtractorAgent(connector_registry=registry, llm_client=llm) diff --git a/conf_template.yaml b/conf_template.yaml index 9aaa9a0..fc8b2f3 100644 --- a/conf_template.yaml +++ b/conf_template.yaml @@ -14,6 +14,12 @@ runtime: operating_mode: inform # inform | hitm | autonomous — only 'inform' is implemented # in Phase 1.5; the others raise NotImplementedError until # their phase ships (hitm: Phase 2, autonomous: Phase 3) + vault_backend: env # env | gcp | hashicorp | aws | azure + # 'env' reads secrets from environment variables (local dev) + # 'gcp' uses GCP Secret Manager via ADC (container deployments) + log_format: human # human | json — human is coloured structlog for terminals; + # json is machine-readable for log aggregators (Splunk, ELK) + log_dir: logs/ # directory for rolling log files (overridden by ARIA_LOG_DIR) runs: db_path: data/runs.db # SQLite file for run history (monitoring API + dashboard) @@ -25,6 +31,10 @@ servicenow: cmdb_rel_type: "Members::Member of" # override if your instance uses a different type llm: + provider: anthropic # anthropic | claude_code | vertex_ai + # anthropic: direct Anthropic API — needs ANTHROPIC_API_KEY + # claude_code: local Claude Code CLI — local dev only (security risk in prod, #84) + # vertex_ai: GCP Vertex AI via ADC — use for container deployments mode: modular # modular | global global_model: "" # used only when mode=global, e.g. claude-sonnet-4-6 agents: diff --git a/core/config.py b/core/config.py index 004c438..bb70801 100644 --- a/core/config.py +++ b/core/config.py @@ -1,9 +1,11 @@ """Runtime configuration loader. -Reads non-secret configuration from conf.yaml (project root). +Reads non-secret configuration from conf.yaml. The path defaults to +conf.yaml in the working directory but can be overridden by setting +ARIA_CONFIG_PATH (e.g. to /etc/aria/conf.yaml when mounted via ConfigMap). Falls back to environment variables when conf.yaml is absent (CI, Docker). Secrets (passwords, API keys, tokens) are never read here — they come from -the process environment injected by Infisical or a local .env file. +the process environment injected by Infisical or a vault implementation. """ import os @@ -13,13 +15,14 @@ @lru_cache(maxsize=1) def _raw() -> dict: - """Load and cache the contents of conf.yaml. + """Load and cache the contents of the config file. + The path is resolved once from ARIA_CONFIG_PATH (default: conf.yaml). Returns an empty dict if the file does not exist or cannot be parsed, so callers can always fall back to environment variables without crashing. - The lru_cache ensures we only read the file once per process lifetime. + Call _raw.cache_clear() in tests when switching ARIA_CONFIG_PATH. """ - path = Path("conf.yaml") + path = Path(os.environ.get("ARIA_CONFIG_PATH", "conf.yaml")) if not path.exists(): return {} try: @@ -109,6 +112,27 @@ def resolve_model(agent_num: str) -> str | None: return llm_agent_model(agent_num) +def llm_provider() -> str: + """Return the LLM provider to use for all agents. + + Values: 'anthropic' (default) | 'claude_code' | 'vertex_ai'. + - anthropic: direct Anthropic API — requires ANTHROPIC_API_KEY. + - claude_code: local Claude Code CLI — safe for local dev only (#84). + - vertex_ai: GCP Vertex AI via ADC — no API key needed in container. + Can be set via llm.provider in conf.yaml or ARIA_LLM_PROVIDER env var. + """ + return _get(["llm", "provider"], "ARIA_LLM_PROVIDER", "anthropic") + + +def vault_backend() -> str: + """Return the vault backend for secret retrieval. + + Values: 'env' (default) | 'gcp' | 'hashicorp' | 'aws' | 'azure'. + Can be set via runtime.vault_backend in conf.yaml or ARIA_VAULT_BACKEND env var. + """ + return _get(["runtime", "vault_backend"], "ARIA_VAULT_BACKEND", "env") + + # ── CDP ─────────────────────────────────────────────────────────────────────── diff --git a/deployment/README.md b/deployment/README.md new file mode 100644 index 0000000..167879f --- /dev/null +++ b/deployment/README.md @@ -0,0 +1,190 @@ +# ARIA — Deployment Guide + +ARIA ships as a single Docker image. This guide covers four deployment patterns. +All patterns use the same image; what changes is how config and secrets are injected. + +--- + +## Prerequisites + +- Docker ≥ 24 +- A `conf.yaml` (copy `conf_template.yaml` from the project root and fill in values) +- Secrets in environment variables or a vault backend (see `.env.example`) + +--- + +## Pattern 1 — Docker CLI (quickstart) + +```bash +docker build -t aria:latest . + +docker run -d \ + --name aria \ + -p 8000:8000 \ + -v /path/to/your/conf.yaml:/etc/aria/conf.yaml:ro \ + -v aria_logs:/var/log/aria \ + -e ARIA_CONFIG_PATH=/etc/aria/conf.yaml \ + -e ARIA_LOG_DIR=/var/log/aria \ + -e SNOW_PASSWORD= \ + -e ANTHROPIC_API_KEY= \ + -e SLACK_BOT_TOKEN= \ + aria:latest + +# Verify +curl http://localhost:8000/api/v1/health +``` + +--- + +## Pattern 2 — docker compose (monolithic) + +```bash +cd deployment/monolithic + +# Copy and fill in the config +cp conf.yaml.example conf.yaml +# Edit conf.yaml with your ServiceNow, GCP, and Slack settings + +# Set secrets in environment (or a .env file in this directory) +export SNOW_PASSWORD=... +export ANTHROPIC_API_KEY=... +export SLACK_BOT_TOKEN=... + +docker compose up -d + +# Tail logs +docker compose logs -f aria +``` + +The compose file bind-mounts `./conf.yaml` to `/etc/aria/conf.yaml` inside the container and uses a named volume for log persistence. + +--- + +## Pattern 3 — Cloud Run + +```bash +# Build and push +docker build -t gcr.io//aria:latest . +docker push gcr.io//aria:latest + +# Deploy (secrets via Secret Manager — set runtime.vault_backend: gcp in conf.yaml) +gcloud run deploy aria \ + --image gcr.io//aria:latest \ + --region europe-west1 \ + --set-env-vars ARIA_CONFIG_PATH=/etc/aria/conf.yaml \ + --set-env-vars ARIA_LLM_PROVIDER=vertex_ai \ + --set-env-vars GCP_PROJECT_ID= \ + --set-secrets SNOW_PASSWORD=aria-snow-password:latest \ + --set-secrets SLACK_BOT_TOKEN=aria-slack-bot-token:latest \ + --memory 2Gi \ + --cpu 2 \ + --no-allow-unauthenticated + +# For conf.yaml: mount via a Cloud Storage FUSE volume or bake into the image at build time. +# The simplest option for Cloud Run is to set ARIA_CONFIG_PATH and pass config as env vars +# (every conf.yaml key has an env var fallback — see conf_template.yaml for the mapping). +``` + +--- + +## Pattern 4 — GKE (ConfigMap pattern) + +Create a ConfigMap from your `conf.yaml`: + +```bash +kubectl create configmap aria-config \ + --from-file=conf.yaml=./conf.yaml \ + --namespace aria +``` + +Deployment snippet (adjust image, replicas, and secret names to your cluster): + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aria + namespace: aria +spec: + replicas: 1 + selector: + matchLabels: + app: aria + template: + metadata: + labels: + app: aria + spec: + serviceAccountName: aria-sa # must have roles/aiplatform.user + roles/secretmanager.secretAccessor + containers: + - name: aria + image: gcr.io//aria:latest + ports: + - containerPort: 8000 + env: + - name: ARIA_CONFIG_PATH + value: /etc/aria/conf.yaml + - name: ARIA_LOG_DIR + value: /var/log/aria + - name: ARIA_LLM_PROVIDER + value: vertex_ai + - name: GCP_PROJECT_ID + value: + - name: ARIA_VAULT_BACKEND + value: gcp + volumeMounts: + - name: config + mountPath: /etc/aria + readOnly: true + - name: logs + mountPath: /var/log/aria + livenessProbe: + httpGet: + path: /api/v1/health + port: 8000 + initialDelaySeconds: 15 + periodSeconds: 30 + volumes: + - name: config + configMap: + name: aria-config + - name: logs + emptyDir: {} +``` + +Apply: + +```bash +kubectl apply -f aria-deployment.yaml +kubectl rollout status deployment/aria -n aria +``` + +--- + +## LLM provider selection + +Set `llm.provider` in `conf.yaml` or override with `ARIA_LLM_PROVIDER`: + +| Provider | Value | Auth | Use case | +|---|---|---|---| +| Anthropic API | `anthropic` | `ANTHROPIC_API_KEY` | Default — any non-GCP deployment | +| GCP Vertex AI | `vertex_ai` | ADC (no API key) | GKE, Cloud Run — recommended for GCP | +| Claude Code CLI | `claude_code` | Local subscription | Local dev only — **not for production** (#84) | + +For Vertex AI, the model ID in `conf.yaml` selects the model family: +- Claude-on-Vertex: `claude-sonnet@20250201` +- Gemini: `gemini-2.0-flash`, `gemini-2.5-pro` + +--- + +## Vault backend selection + +Set `runtime.vault_backend` in `conf.yaml` or override with `ARIA_VAULT_BACKEND`: + +| Backend | Value | Auth | +|---|---|---| +| Environment variables | `env` (default) | None — reads from process env | +| GCP Secret Manager | `gcp` | ADC — needs `GCP_PROJECT_ID` env var | +| HashiCorp Vault | `hashicorp` | `VAULT_TOKEN` env var | +| AWS Secrets Manager | `aws` | `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | +| Azure Key Vault | `azure` | Azure SDK credential chain | diff --git a/deployment/monolithic/conf.yaml.example b/deployment/monolithic/conf.yaml.example new file mode 100644 index 0000000..bc09e0c --- /dev/null +++ b/deployment/monolithic/conf.yaml.example @@ -0,0 +1,52 @@ +# ────────────────────────────────────────────────────────────────────────────── +# ARIA — monolithic deployment config example +# Copy to conf.yaml in this directory and fill in your values. +# This file is bind-mounted at /etc/aria/conf.yaml inside the container. +# ────────────────────────────────────────────────────────────────────────────── + +runtime: + operating_mode: inform # only 'inform' is implemented in Phase 1.5 + vault_backend: env # 'env' reads secrets from environment variables + log_format: json # json is best for container log aggregators + log_dir: /var/log/aria # matches the volume mount in docker-compose.yml + +runs: + db_path: /var/log/aria/runs.db # store run history alongside logs + +servicenow: + instance: .service-now.com + user: + assignment_group: + cmdb_rel_type: "Members::Member of" + +llm: + provider: anthropic # anthropic | claude_code | vertex_ai + mode: global + global_model: claude-sonnet-4-6 + agents: + agent1: "" + agent2: "" # leave empty to use static routing in Agent 2 + agent3: "" + agent4: "" + +connectors: + log: gcp # gcp | memory (use memory only for dry-run/testing) + queue: memory + state_store: memory + +gcp: + project_id: + region: europe-west1 + gcs_bucket_logs: + bq_log_dataset: + +cdp: + ssh_user: hadoop + log_dirs: + - /var/log/hadoop-hdfs + - /var/log/hadoop-yarn + - /var/log/hive + - /var/log/spark + +slack: + channel_id: diff --git a/deployment/monolithic/docker-compose.yml b/deployment/monolithic/docker-compose.yml new file mode 100644 index 0000000..383f0c3 --- /dev/null +++ b/deployment/monolithic/docker-compose.yml @@ -0,0 +1,40 @@ +services: + aria: + build: + context: ../.. # project root + dockerfile: Dockerfile + ports: + - "8000:8000" + environment: + # Config file path — the conf.yaml below is bind-mounted here. + ARIA_CONFIG_PATH: /etc/aria/conf.yaml + + # Log directory — written to the named volume below. + ARIA_LOG_DIR: /var/log/aria + + # Enable the built-in ops dashboard. + ARIA_DASHBOARD_ENABLED: "true" + + # Secrets — passed as env vars (EnvVarVault default). + # Replace these with real values or use a .env file (not committed). + SNOW_PASSWORD: "${SNOW_PASSWORD}" + ANTHROPIC_API_KEY: "${ANTHROPIC_API_KEY}" + SLACK_BOT_TOKEN: "${SLACK_BOT_TOKEN}" + + volumes: + # Operator-provided config file (copy from conf.yaml.example and fill in). + - ./conf.yaml:/etc/aria/conf.yaml:ro + + # Persistent log storage. + - aria_logs:/var/log/aria + + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"] + interval: 30s + timeout: 10s + start_period: 15s + retries: 3 + +volumes: + aria_logs: diff --git a/documentation/guides/installation.md b/documentation/guides/installation.md new file mode 100644 index 0000000..1abfd18 --- /dev/null +++ b/documentation/guides/installation.md @@ -0,0 +1,281 @@ +# Installation Guide + +ARIA ships as a single Docker image. There is no Python installation required on the target machine — only Docker (local) or a Kubernetes cluster (production). + +Two deployment paths are covered here: + +- **Docker** — run ARIA on a local machine or a single VM +- **Kubernetes** — run ARIA on a GKE cluster (or any Kubernetes distribution) + +Both paths share the same image and the same `conf.yaml` configuration file. What changes is how that config and the required secrets are injected at runtime. + +--- + +## Prerequisites + +### Both paths + +- A `conf.yaml` — copy `conf_template.yaml` from the project root and fill in your values +- Credentials for the services ARIA connects to (ServiceNow, Slack, and an LLM provider) — see `.env.example` for the full variable list + +### Docker path + +- Docker ≥ 24 installed on the target machine + +### Kubernetes path + +- A running Kubernetes cluster (GKE, EKS, AKS, or self-managed) +- `kubectl` configured to point at the target cluster +- A container registry the cluster can pull from (GCR, Artifact Registry, ECR, GHCR) + +--- + +## Prepare your conf.yaml + +Start from the project template: + +```bash +cp conf_template.yaml conf.yaml +``` + +Minimum fields to fill in before ARIA will start: + +```yaml +servicenow: + instance: dev382816 # your ServiceNow instance subdomain + user: admin + +slack: + channel_id: C0123456789 # target notification channel + +llm: + provider: anthropic # anthropic | vertex_ai | claude_code (local dev only) + model: claude-sonnet-4-6 + +runtime: + vault_backend: env # env | gcp (see Vault backend section below) + log_dir: /var/log/aria +``` + +`conf.yaml` is **never baked into the image** — it is always injected at runtime as a volume mount or ConfigMap. This means the same image works across environments without a rebuild. + +--- + +## Path 1 — Docker (local machine or VM) + +### 1. Build the image + +From the project root: + +```bash +docker build -t aria:latest . +``` + +### 2. Run the container + +```bash +docker run -d \ + --name aria \ + -p 8000:8000 \ + -v /absolute/path/to/your/conf.yaml:/etc/aria/conf.yaml:ro \ + -v aria_logs:/var/log/aria \ + -e ARIA_CONFIG_PATH=/etc/aria/conf.yaml \ + -e ARIA_LOG_DIR=/var/log/aria \ + -e SNOW_PASSWORD= \ + -e ANTHROPIC_API_KEY= \ + -e SLACK_BOT_TOKEN= \ + aria:latest +``` + +The `conf.yaml` is mounted read-only. Secrets are passed as environment variables (not stored in the config file). + +### 3. Verify + +```bash +curl http://localhost:8000/api/v1/health +# Expected: {"status": "ok"} +``` + +### Optional: Docker Compose + +If you prefer Compose, a ready-to-use file is available under `deployment/monolithic/`: + +```bash +cd deployment/monolithic +cp conf.yaml.example conf.yaml # fill in your values + +export SNOW_PASSWORD=... +export ANTHROPIC_API_KEY=... +export SLACK_BOT_TOKEN=... + +docker compose up -d +docker compose logs -f aria +``` + +--- + +## Path 2 — Kubernetes + +### 1. Build and push the image + +```bash +# Replace and with your values +docker build -t //aria:latest . +docker push //aria:latest +``` + +For GKE with Artifact Registry: + +```bash +docker build -t europe-west1-docker.pkg.dev//aria/aria:latest . +docker push europe-west1-docker.pkg.dev//aria/aria:latest +``` + +### 2. Create the namespace and ConfigMap + +```bash +kubectl create namespace aria + +kubectl create configmap aria-config \ + --from-file=conf.yaml=./conf.yaml \ + --namespace aria +``` + +### 3. Create the secrets + +```bash +kubectl create secret generic aria-secrets \ + --from-literal=SNOW_PASSWORD= \ + --from-literal=ANTHROPIC_API_KEY= \ + --from-literal=SLACK_BOT_TOKEN= \ + --namespace aria +``` + +For GCP deployments using Vertex AI and Secret Manager, grant the pod's service account the necessary IAM roles instead of mounting API keys — see the [LLM provider section](#llm-provider-selection) below. + +### 4. Deploy + +Save the following as `aria-deployment.yaml` and adjust the image path, project ID, and replica count: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aria + namespace: aria +spec: + replicas: 1 + selector: + matchLabels: + app: aria + template: + metadata: + labels: + app: aria + spec: + # For GCP: bind a Kubernetes service account to a GCP service account via Workload Identity + # serviceAccountName: aria-sa + containers: + - name: aria + image: //aria:latest + ports: + - containerPort: 8000 + env: + - name: ARIA_CONFIG_PATH + value: /etc/aria/conf.yaml + - name: ARIA_LOG_DIR + value: /var/log/aria + envFrom: + - secretRef: + name: aria-secrets + volumeMounts: + - name: config + mountPath: /etc/aria + readOnly: true + - name: logs + mountPath: /var/log/aria + livenessProbe: + httpGet: + path: /api/v1/health + port: 8000 + initialDelaySeconds: 15 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /api/v1/health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 + volumes: + - name: config + configMap: + name: aria-config + - name: logs + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: aria + namespace: aria +spec: + selector: + app: aria + ports: + - port: 80 + targetPort: 8000 + type: ClusterIP +``` + +Apply and verify: + +```bash +kubectl apply -f aria-deployment.yaml +kubectl rollout status deployment/aria -n aria +kubectl get pods -n aria +``` + +### 5. Access the API + +From within the cluster, ARIA is reachable at `http://aria.aria.svc.cluster.local/api/v1/health`. + +To expose it externally, add an Ingress resource pointing to the `aria` Service on port 80. + +--- + +## LLM provider selection + +Set `llm.provider` in `conf.yaml` or override with the `ARIA_LLM_PROVIDER` environment variable: + +| Provider | `llm.provider` value | Auth required | Recommended for | +|---|---|---|---| +| Anthropic API | `anthropic` (default) | `ANTHROPIC_API_KEY` env var | Any non-GCP deployment | +| GCP Vertex AI | `vertex_ai` | ADC via Workload Identity — no API key in pod | GKE, Cloud Run | +| Claude Code CLI | `claude_code` | Local Claude subscription | Local dev only — **not for production** | + +For Vertex AI, the model name in `conf.yaml` selects the model family: + +```yaml +llm: + provider: vertex_ai + model: claude-sonnet@20250201 # Claude-on-Vertex + # model: gemini-2.0-flash # Gemini +``` + +The pod's service account must have `roles/aiplatform.user` on the GCP project. With Workload Identity, no API key or credential file is needed in the container. + +--- + +## Vault / secrets backend + +ARIA can read secrets from different backends depending on the environment. Set `runtime.vault_backend` in `conf.yaml` or override with `ARIA_VAULT_BACKEND`: + +| Backend | Value | How secrets are supplied | +|---|---|---| +| Environment variables | `env` (default) | Pass secrets as `-e KEY=value` in `docker run` or as a Kubernetes Secret | +| GCP Secret Manager | `gcp` | ADC — needs `GCP_PROJECT_ID` env var; pod service account needs `roles/secretmanager.secretAccessor` | +| HashiCorp Vault | `hashicorp` | `VAULT_TOKEN` and `VAULT_ADDR` env vars | +| AWS Secrets Manager | `aws` | `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` env vars (or instance profile) | +| Azure Key Vault | `azure` | Azure SDK credential chain | + +For local and staging deployments, `env` (the default) is the simplest option. For production GCP deployments, `gcp` removes the need to manage credentials manually — the pod authenticates via Workload Identity. diff --git a/documentation/index.md b/documentation/index.md index 56b28fe..25b92f8 100644 --- a/documentation/index.md +++ b/documentation/index.md @@ -28,4 +28,5 @@ ARIA automates all four steps. Phase 1 is **notify-only**: ARIA presents its fin - [Agent descriptions](architecture/agents.md) - [Interface & plugin system](architecture/interfaces.md) - [Core data models](architecture/data-models.md) -- [Getting started](guides/getting-started.md) +- [Getting started (local dev)](guides/getting-started.md) +- [Installation guide (Docker + Kubernetes)](guides/installation.md) diff --git a/implementations/llm/vertex_ai/__init__.py b/implementations/llm/vertex_ai/__init__.py new file mode 100644 index 0000000..b2b105b --- /dev/null +++ b/implementations/llm/vertex_ai/__init__.py @@ -0,0 +1,3 @@ +from implementations.llm.vertex_ai.llm_client import VertexAILLMClient + +__all__ = ["VertexAILLMClient"] diff --git a/implementations/llm/vertex_ai/llm_client.py b/implementations/llm/vertex_ai/llm_client.py new file mode 100644 index 0000000..501b1a6 --- /dev/null +++ b/implementations/llm/vertex_ai/llm_client.py @@ -0,0 +1,209 @@ +"""Vertex AI implementation of LLMClientInterface. + +Supports two model families, detected by model name prefix: + - Claude-on-Vertex (model starts with "claude"): uses anthropic.AnthropicVertex, + which authenticates via Application Default Credentials (ADC) against Vertex AI. + - Gemini (all other model names): uses google-cloud-aiplatform SDK, + also via ADC. + +Auth: ADC only — no API key required in the container. Grant the service account + roles/aiplatform.user on the GCP project and ADC will resolve credentials + automatically on GKE, Cloud Run, and Compute Engine. + +Example: + client = VertexAILLMClient( + model="claude-sonnet@20250201", + project_id="my-project", + location="europe-west1", + ) +""" + +import time + +import anthropic + +from core.exceptions import LLMAuthError, LLMResponseError, LLMUnavailableError +from core.interfaces.llm_client import LLMClientInterface +from core.observability import EVENT_LLM_CALL_COMPLETED, get_logger, record_llm_tokens + +logger = get_logger(__name__) + + +class VertexAILLMClient(LLMClientInterface): + """LLMClientInterface backed by GCP Vertex AI (ADC auth, no API key).""" + + def __init__(self, model: str, project_id: str, location: str = "europe-west1") -> None: + """Initialise the Vertex AI client. + + Args: + model: Model identifier. Claude models: 'claude-sonnet@20250201'. + Gemini models: 'gemini-2.0-flash', 'gemini-2.5-pro'. + project_id: GCP project ID that hosts the Vertex AI endpoint. + location: GCP region for the endpoint (default: europe-west1). + Reads from gcp.region in conf.yaml or GCP_REGION env var. + + Raises: + LLMAuthError: If ADC credentials cannot be resolved at construction time. + ImportError: If google-cloud-aiplatform or anthropic[vertex] are not installed. + """ + self._model = model + self._project_id = project_id + self._location = location + self._is_claude = model.startswith("claude") + + def complete( + self, + messages: list[dict[str, str]], + max_tokens: int = 1024, + temperature: float = 0.0, + system: str | None = None, + ) -> str: + """Send messages to Vertex AI and return the response text. + + Routes to AnthropicVertex for Claude models, or to the Gemini SDK + for all other model names. + + Raises: + LLMAuthError: ADC credentials missing or permission denied. + LLMUnavailableError: Network error or 5xx from Vertex AI. + LLMResponseError: Empty or unparseable response. + """ + if self._is_claude: + return self._complete_claude(messages, max_tokens, temperature, system) + return self._complete_gemini(messages, max_tokens, temperature, system) + + # ── Claude-on-Vertex ────────────────────────────────────────────────────── + + def _complete_claude( + self, + messages: list[dict[str, str]], + max_tokens: int, + temperature: float, + system: str | None, + ) -> str: + start = time.monotonic() + try: + client = anthropic.AnthropicVertex( + project_id=self._project_id, + region=self._location, + ) + kwargs: dict = { + "model": self._model, + "max_tokens": max_tokens, + "messages": messages, + } + if system: + kwargs["system"] = system + + response = client.messages.create(**kwargs) + + except anthropic.AuthenticationError as exc: + raise LLMAuthError(f"Vertex AI ADC credentials rejected: {exc}") from exc + except anthropic.APIConnectionError as exc: + raise LLMUnavailableError(f"Vertex AI connection error: {exc}") from exc + except anthropic.APIStatusError as exc: + raise LLMUnavailableError(f"Vertex AI API error {exc.status_code}: {exc}") from exc + + usage = getattr(response, "usage", None) + tokens_in = getattr(usage, "input_tokens", None) + tokens_out = getattr(usage, "output_tokens", None) + record_llm_tokens(tokens_in, tokens_out) + logger.info( + EVENT_LLM_CALL_COMPLETED, + model=self._model, + provider="vertex_ai_claude", + tokens_in=tokens_in, + tokens_out=tokens_out, + duration_ms=int((time.monotonic() - start) * 1000), + ) + + if not response.content: + raise LLMResponseError("Vertex AI (Claude) returned an empty response") + + return response.content[0].text + + # ── Gemini ──────────────────────────────────────────────────────────────── + + def _complete_gemini( + self, + messages: list[dict[str, str]], + max_tokens: int, + temperature: float, + system: str | None, + ) -> str: + try: + import vertexai + from vertexai.generative_models import Content, GenerationConfig, GenerativeModel, Part + except ImportError as exc: + raise ImportError( + "google-cloud-aiplatform package is required for Gemini models. " + "Install with: pip install google-cloud-aiplatform" + ) from exc + + try: + import google.auth.exceptions as google_auth_exc + except ImportError: + google_auth_exc = None # type: ignore[assignment] + + start = time.monotonic() + try: + vertexai.init(project=self._project_id, location=self._location) + + # Convert messages to Vertex AI Content objects. + # Gemini role names are "user" and "model" (not "assistant"). + contents = [] + for msg in messages: + role = "model" if msg["role"] == "assistant" else "user" + contents.append(Content(role=role, parts=[Part.from_text(msg["content"])])) + + # Prepend system instruction as a leading user turn when provided, + # since Gemini handles system prompts as a constructor argument. + model = GenerativeModel( + model_name=self._model, + system_instruction=system if system else None, + ) + + gen_config = GenerationConfig( + max_output_tokens=max_tokens, + temperature=temperature, + ) + response = model.generate_content(contents, generation_config=gen_config) + + except Exception as exc: + # Map Google auth errors to LLMAuthError. + exc_name = type(exc).__name__ + if google_auth_exc and isinstance(exc, google_auth_exc.DefaultCredentialsError): + raise LLMAuthError(f"GCP ADC credentials not found: {exc}") from exc + # PermissionDenied / 403 → auth. + if "PermissionDenied" in exc_name or "403" in str(exc): + raise LLMAuthError(f"Vertex AI permission denied: {exc}") from exc + # ServiceUnavailable / network errors → unavailable. + if "ServiceUnavailable" in exc_name or "Unavailable" in exc_name: + raise LLMUnavailableError(f"Vertex AI unavailable: {exc}") from exc + raise LLMUnavailableError(f"Vertex AI error: {exc}") from exc + + # Gemini does not expose token counts in the same way; best-effort. + usage_meta = getattr(response, "usage_metadata", None) + tokens_in = getattr(usage_meta, "prompt_token_count", None) + tokens_out = getattr(usage_meta, "candidates_token_count", None) + record_llm_tokens(tokens_in, tokens_out) + logger.info( + EVENT_LLM_CALL_COMPLETED, + model=self._model, + provider="vertex_ai_gemini", + tokens_in=tokens_in, + tokens_out=tokens_out, + duration_ms=int((time.monotonic() - start) * 1000), + ) + + try: + text = response.text + except Exception as exc: + raise LLMResponseError( + f"Vertex AI (Gemini) returned an empty or blocked response: {exc}" + ) from exc + + if not text: + raise LLMResponseError("Vertex AI (Gemini) returned an empty response") + + return text diff --git a/implementations/storage/sqlite_run_store.py b/implementations/storage/sqlite_run_store.py index f381169..647bf49 100644 --- a/implementations/storage/sqlite_run_store.py +++ b/implementations/storage/sqlite_run_store.py @@ -43,36 +43,45 @@ ) """ +# Static query templates for list() and count(). Each optional filter uses the +# "? IS NULL OR column op ?" pattern so the SQL string is a constant — user +# input flows only into the parameter tuple, never into the query string itself. +# Each filter value must appear twice: once for the IS NULL check, once for the +# comparison. NULL IS NULL evaluates to TRUE in SQLite, so an unset filter is +# a no-op without any dynamic WHERE clause construction. +_LIST_SQL = ( + "SELECT " + _COLUMNS + " FROM runs" + " WHERE (? IS NULL OR start_time >= ?)" + " AND (? IS NULL OR start_time <= ?)" + " AND (? IS NULL OR status = ?)" + " AND (? IS NULL OR error_class = ?)" + " ORDER BY start_time DESC LIMIT ? OFFSET ?" +) + +_COUNT_SQL = ( + "SELECT COUNT(*) FROM runs" + " WHERE (? IS NULL OR start_time >= ?)" + " AND (? IS NULL OR start_time <= ?)" + " AND (? IS NULL OR status = ?)" + " AND (? IS NULL OR error_class = ?)" +) + -def _build_filters( +def _filter_params( from_dt: datetime | None, to_dt: datetime | None, status: str | None, error_class: str | None, -) -> tuple[str, list[str]]: - """Build the shared WHERE clause for list() and count(). +) -> tuple: + """Build the parameter tuple for _LIST_SQL / _COUNT_SQL. - Module-level (not a method) because the ``list`` method name on the store - class shadows the builtin in annotations. ISO-8601 strings compare - lexicographically in the same order as the datetimes they encode, so the - range filters are plain string compares. + ISO-8601 strings compare lexicographically in the same order as the + datetimes they represent, so range filters are plain string comparisons. + Each value appears twice to match the (? IS NULL OR column = ?) pattern. """ - clauses: list[str] = [] - params: list[str] = [] - if from_dt is not None: - clauses.append("start_time >= ?") - params.append(from_dt.isoformat()) - if to_dt is not None: - clauses.append("start_time <= ?") - params.append(to_dt.isoformat()) - if status is not None: - clauses.append("status = ?") - params.append(status) - if error_class is not None: - clauses.append("error_class = ?") - params.append(error_class) - where = f" WHERE {' AND '.join(clauses)}" if clauses else "" - return where, params + from_s = from_dt.isoformat() if from_dt is not None else None + to_s = to_dt.isoformat() if to_dt is not None else None + return (from_s, from_s, to_s, to_s, status, status, error_class, error_class) class SQLiteRunStore(RunStoreInterface): @@ -125,10 +134,9 @@ def list( offset: int = 0, ) -> list[RunRecord]: """Query records newest-first with optional time/status/error filters.""" - where, params = _build_filters(from_dt, to_dt, status, error_class) - sql = f"SELECT {_COLUMNS} FROM runs{where} " "ORDER BY start_time DESC LIMIT ? OFFSET ?" + params = _filter_params(from_dt, to_dt, status, error_class) with self._connect() as conn: - rows = conn.execute(sql, (*params, limit, offset)).fetchall() + rows = conn.execute(_LIST_SQL, (*params, limit, offset)).fetchall() return [self._row_to_record(r) for r in rows] def count( @@ -139,9 +147,9 @@ def count( error_class: str | None = None, ) -> int: """Count records matching the same filters as list().""" - where, params = _build_filters(from_dt, to_dt, status, error_class) + params = _filter_params(from_dt, to_dt, status, error_class) with self._connect() as conn: - (n,) = conn.execute(f"SELECT COUNT(*) FROM runs{where}", params).fetchone() + (n,) = conn.execute(_COUNT_SQL, params).fetchone() return int(n) @staticmethod diff --git a/implementations/vault/gcp_secret_manager.py b/implementations/vault/gcp_secret_manager.py new file mode 100644 index 0000000..800b487 --- /dev/null +++ b/implementations/vault/gcp_secret_manager.py @@ -0,0 +1,91 @@ +"""GCP Secret Manager backed VaultInterface. + +Authenticates via Application Default Credentials (ADC) — no API key or service +account JSON required in the container. On GKE and Cloud Run, ADC resolves +automatically from the Workload Identity or the instance metadata server. + +The service account must have roles/secretmanager.secretAccessor on the project. + +Example: + vault = GCPSecretManagerVault.from_env() + ssh_key = vault.get_secret("aria-cdp-ssh-key") +""" + +import os + +from core.exceptions import VaultSecretNotFoundError, VaultUnavailableError +from core.interfaces.vault import VaultInterface + + +class GCPSecretManagerVault(VaultInterface): + """VaultInterface backed by GCP Secret Manager, authenticated via ADC.""" + + def __init__(self, project_id: str) -> None: + """Initialise the vault for the given GCP project. + + Args: + project_id: GCP project ID that hosts the secrets. + + Raises: + ImportError: If google-cloud-secret-manager is not installed. + """ + try: + from google.cloud import secretmanager + except ImportError as exc: + raise ImportError( + "google-cloud-secret-manager is required for GCPSecretManagerVault. " + "Install with: pip install google-cloud-secret-manager" + ) from exc + + self._project_id = project_id + self._client = secretmanager.SecretManagerServiceClient() + + @classmethod + def from_env(cls) -> "GCPSecretManagerVault": + """Construct from the GCP_PROJECT_ID environment variable. + + Raises: + ValueError: If GCP_PROJECT_ID is not set. + """ + project_id = os.environ.get("GCP_PROJECT_ID") + if not project_id: + raise ValueError("GCP_PROJECT_ID environment variable is not set") + return cls(project_id=project_id) + + def get_secret(self, key: str) -> str: + """Retrieve the latest version of a secret from GCP Secret Manager. + + Args: + key: Secret name as configured in Secret Manager (e.g. 'aria-cdp-ssh-key'). + Underscores are normalised to hyphens to match Secret Manager naming + conventions (secret IDs cannot contain underscores by GCP policy). + + Returns: + Secret payload as a UTF-8 string. + + Raises: + VaultSecretNotFoundError: If the secret does not exist or has no active version. + VaultUnavailableError: If Secret Manager cannot be reached or ADC fails. + """ + # GCP Secret Manager does not allow underscores in secret IDs. + secret_id = key.replace("_", "-") + name = f"projects/{self._project_id}/secrets/{secret_id}/versions/latest" + + try: + response = self._client.access_secret_version(request={"name": name}) + return response.payload.data.decode("utf-8") + + except Exception as exc: + exc_type = type(exc).__name__ + # NotFound → secret does not exist. + if "NotFound" in exc_type or "404" in str(exc): + raise VaultSecretNotFoundError( + f"Secret '{key}' (id: '{secret_id}') not found in project '{self._project_id}'" + ) from exc + # PermissionDenied / auth failures. + if "PermissionDenied" in exc_type or "DefaultCredentials" in exc_type: + raise VaultUnavailableError( + f"GCP Secret Manager permission denied or ADC not configured: {exc}" + ) from exc + # Everything else — network, quota, etc. + raise VaultUnavailableError(f"GCP Secret Manager unavailable: {exc}") from exc diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py new file mode 100644 index 0000000..d1588e9 --- /dev/null +++ b/tests/unit/test_config.py @@ -0,0 +1,110 @@ +"""Unit tests for core/config.py — ARIA_CONFIG_PATH and new S3 helpers.""" + +import textwrap + +import pytest + +import core.config as cfg + + +@pytest.fixture(autouse=True) +def clear_config_cache(): + """Clear the _raw() LRU cache before and after each test.""" + cfg._raw.cache_clear() + yield + cfg._raw.cache_clear() + + +class TestARIAConfigPath: + def test_defaults_to_conf_yaml_when_env_not_set(self, tmp_path, monkeypatch): + """When ARIA_CONFIG_PATH is absent, conf.yaml in CWD is used.""" + monkeypatch.delenv("ARIA_CONFIG_PATH", raising=False) + conf = tmp_path / "conf.yaml" + conf.write_text("slack:\n channel_id: C_FROM_FILE\n") + monkeypatch.chdir(tmp_path) + assert cfg.slack_channel_id() == "C_FROM_FILE" + + def test_reads_from_aria_config_path(self, tmp_path, monkeypatch): + """ARIA_CONFIG_PATH overrides the default conf.yaml location.""" + conf = tmp_path / "custom.yaml" + conf.write_text("slack:\n channel_id: C_CUSTOM\n") + monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf)) + assert cfg.slack_channel_id() == "C_CUSTOM" + + def test_missing_file_does_not_crash(self, tmp_path, monkeypatch): + """A missing ARIA_CONFIG_PATH file returns empty dict — callers fall back to env vars.""" + monkeypatch.setenv("ARIA_CONFIG_PATH", str(tmp_path / "nonexistent.yaml")) + monkeypatch.setenv("SLACK_CHANNEL_ID", "C_FROM_ENV") + assert cfg.slack_channel_id() == "C_FROM_ENV" + + def test_malformed_yaml_falls_back_to_env(self, tmp_path, monkeypatch): + """A malformed YAML file logs a warning and falls back to env vars without crashing.""" + conf = tmp_path / "bad.yaml" + conf.write_text("{{ not: valid: yaml: [[[") + monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf)) + monkeypatch.setenv("SLACK_CHANNEL_ID", "C_ENV_FALLBACK") + assert cfg.slack_channel_id() == "C_ENV_FALLBACK" + + def test_env_var_overrides_file_value(self, tmp_path, monkeypatch): + """Environment variable takes precedence over conf.yaml value.""" + conf = tmp_path / "conf.yaml" + conf.write_text("slack:\n channel_id: C_FILE\n") + monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf)) + monkeypatch.setenv("SLACK_CHANNEL_ID", "C_ENV_WINS") + # env var wins because _get() checks env AFTER yaml value — but only if yaml is empty + # Here yaml has a value, so yaml wins. Test the case where yaml is absent: + conf2 = tmp_path / "empty.yaml" + conf2.write_text("{}\n") + monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf2)) + monkeypatch.setenv("SLACK_CHANNEL_ID", "C_ENV_WINS") + assert cfg.slack_channel_id() == "C_ENV_WINS" + + +class TestLLMProvider: + def test_defaults_to_anthropic(self, monkeypatch): + """llm_provider() returns 'anthropic' when nothing is configured.""" + monkeypatch.delenv("ARIA_LLM_PROVIDER", raising=False) + assert cfg.llm_provider() == "anthropic" + + def test_reads_from_env_var(self, monkeypatch): + """ARIA_LLM_PROVIDER env var is respected.""" + monkeypatch.setenv("ARIA_LLM_PROVIDER", "claude_code") + assert cfg.llm_provider() == "claude_code" + + def test_reads_from_conf_yaml(self, tmp_path, monkeypatch): + """llm.provider in conf.yaml is respected.""" + conf = tmp_path / "conf.yaml" + conf.write_text(textwrap.dedent("""\ + llm: + provider: vertex_ai + """)) + monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf)) + monkeypatch.delenv("ARIA_LLM_PROVIDER", raising=False) + assert cfg.llm_provider() == "vertex_ai" + + def test_vertex_ai_value(self, monkeypatch): + monkeypatch.setenv("ARIA_LLM_PROVIDER", "vertex_ai") + assert cfg.llm_provider() == "vertex_ai" + + +class TestVaultBackend: + def test_defaults_to_env(self, monkeypatch): + """vault_backend() returns 'env' when nothing is configured.""" + monkeypatch.delenv("ARIA_VAULT_BACKEND", raising=False) + assert cfg.vault_backend() == "env" + + def test_reads_from_env_var(self, monkeypatch): + """ARIA_VAULT_BACKEND env var is respected.""" + monkeypatch.setenv("ARIA_VAULT_BACKEND", "gcp") + assert cfg.vault_backend() == "gcp" + + def test_reads_from_conf_yaml(self, tmp_path, monkeypatch): + """runtime.vault_backend in conf.yaml is respected.""" + conf = tmp_path / "conf.yaml" + conf.write_text(textwrap.dedent("""\ + runtime: + vault_backend: hashicorp + """)) + monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf)) + monkeypatch.delenv("ARIA_VAULT_BACKEND", raising=False) + assert cfg.vault_backend() == "hashicorp" diff --git a/tests/unit/test_dependencies.py b/tests/unit/test_dependencies.py new file mode 100644 index 0000000..a5b6524 --- /dev/null +++ b/tests/unit/test_dependencies.py @@ -0,0 +1,127 @@ +"""Unit tests for the DI factory functions in api/dependencies.py.""" + +from unittest.mock import MagicMock, patch + +import pytest + + +class TestGetLLMClient: + """_get_llm_client() routes to the correct implementation based on llm.provider.""" + + def _call(self, provider: str, monkeypatch): + monkeypatch.setenv("ARIA_LLM_PROVIDER", provider) + # Clear the config cache so it picks up the new env var. + import core.config as cfg + + cfg._raw.cache_clear() + from api.dependencies import _get_llm_client + + return _get_llm_client + + def test_anthropic_provider_returns_anthropic_client(self, monkeypatch): + monkeypatch.setenv("ARIA_LLM_PROVIDER", "anthropic") + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + import core.config as cfg + + cfg._raw.cache_clear() + + from implementations.llm.anthropic.llm_client import AnthropicLLMClient + + with patch.object(AnthropicLLMClient, "__init__", return_value=None): + from api.dependencies import _get_llm_client + + result = _get_llm_client("claude-sonnet-4-6") + assert isinstance(result, AnthropicLLMClient) + + def test_claude_code_provider_returns_claude_code_client(self, monkeypatch): + monkeypatch.setenv("ARIA_LLM_PROVIDER", "claude_code") + import core.config as cfg + + cfg._raw.cache_clear() + + from implementations.llm.claude_code.llm_client import ClaudeCodeLLMClient + + with patch.object(ClaudeCodeLLMClient, "__init__", return_value=None): + from api.dependencies import _get_llm_client + + result = _get_llm_client("claude-sonnet-4-6") + assert isinstance(result, ClaudeCodeLLMClient) + + def test_vertex_ai_provider_returns_vertex_client(self, monkeypatch): + monkeypatch.setenv("ARIA_LLM_PROVIDER", "vertex_ai") + monkeypatch.setenv("GCP_PROJECT_ID", "my-project") + monkeypatch.setenv("GCP_REGION", "europe-west1") + import core.config as cfg + + cfg._raw.cache_clear() + + from implementations.llm.vertex_ai.llm_client import VertexAILLMClient + + with patch.object(VertexAILLMClient, "__init__", return_value=None): + from api.dependencies import _get_llm_client + + result = _get_llm_client("claude-sonnet@20250201") + assert isinstance(result, VertexAILLMClient) + + def test_unknown_provider_raises_value_error(self, monkeypatch): + monkeypatch.setenv("ARIA_LLM_PROVIDER", "unsupported_provider") + import core.config as cfg + + cfg._raw.cache_clear() + from api.dependencies import _get_llm_client + + with pytest.raises(ValueError, match="Unknown llm.provider"): + _get_llm_client("some-model") + + +class TestGetVault: + """_get_vault() routes to the correct vault backend based on runtime.vault_backend.""" + + def test_env_backend_returns_envvar_vault(self, monkeypatch): + monkeypatch.setenv("ARIA_VAULT_BACKEND", "env") + import core.config as cfg + + cfg._raw.cache_clear() + from api.dependencies import _get_vault + from implementations.vault.envvar import EnvVarVault + + result = _get_vault() + assert isinstance(result, EnvVarVault) + + def test_default_returns_envvar_vault(self, monkeypatch): + monkeypatch.delenv("ARIA_VAULT_BACKEND", raising=False) + import core.config as cfg + + cfg._raw.cache_clear() + from api.dependencies import _get_vault + from implementations.vault.envvar import EnvVarVault + + result = _get_vault() + assert isinstance(result, EnvVarVault) + + def test_gcp_backend_returns_gcp_vault(self, monkeypatch): + monkeypatch.setenv("ARIA_VAULT_BACKEND", "gcp") + monkeypatch.setenv("GCP_PROJECT_ID", "my-project") + import core.config as cfg + + cfg._raw.cache_clear() + + # GCPSecretManagerVault imports google-cloud-secret-manager at __init__ time; + # mock the module so the import succeeds without the package installed. + import sys + from types import ModuleType + + mock_sm = MagicMock() + google_mod = sys.modules.get("google") or ModuleType("google") + google_cloud_mod = sys.modules.get("google.cloud") or ModuleType("google.cloud") + monkeypatch.setitem(sys.modules, "google", google_mod) + monkeypatch.setitem(sys.modules, "google.cloud", google_cloud_mod) + monkeypatch.setitem(sys.modules, "google.cloud.secretmanager", mock_sm) + monkeypatch.delitem(sys.modules, "implementations.vault.gcp_secret_manager", raising=False) + + from api.dependencies import _get_vault + from implementations.vault.gcp_secret_manager import GCPSecretManagerVault + + result = _get_vault() + assert isinstance(result, GCPSecretManagerVault) + assert result._project_id == "my-project" diff --git a/tests/unit/test_gcp_secret_manager_vault.py b/tests/unit/test_gcp_secret_manager_vault.py new file mode 100644 index 0000000..74bb60b --- /dev/null +++ b/tests/unit/test_gcp_secret_manager_vault.py @@ -0,0 +1,151 @@ +"""Unit tests for GCPSecretManagerVault. + +google-cloud-secret-manager is NOT installed in dev (it's a container-only dep), +so we mock the entire google.cloud.secretmanager module via sys.modules. +""" + +import sys +from types import ModuleType +from unittest.mock import MagicMock + +import pytest + +from core.exceptions import VaultSecretNotFoundError, VaultUnavailableError + +# ── module-level mock for google.cloud.secretmanager ───────────────────────── + + +def _make_secretmanager_mock() -> MagicMock: + """Build a minimal fake google.cloud.secretmanager module.""" + mock_sm = MagicMock() + mock_sm.SecretManagerServiceClient = MagicMock + return mock_sm + + +@pytest.fixture(autouse=True) +def mock_gcp_secretmanager(monkeypatch): + """Inject a fake google.cloud.secretmanager into sys.modules for every test.""" + mock_sm = _make_secretmanager_mock() + # Ensure parent namespace packages exist. + google_mod = sys.modules.get("google") or ModuleType("google") + google_cloud_mod = sys.modules.get("google.cloud") or ModuleType("google.cloud") + monkeypatch.setitem(sys.modules, "google", google_mod) + monkeypatch.setitem(sys.modules, "google.cloud", google_cloud_mod) + monkeypatch.setitem(sys.modules, "google.cloud.secretmanager", mock_sm) + # Also clear the vault module from sys.modules so the import runs fresh each test. + monkeypatch.delitem(sys.modules, "implementations.vault.gcp_secret_manager", raising=False) + yield mock_sm + + +# ── helpers ─────────────────────────────────────────────────────────────────── + + +def _make_vault(mock_sm: MagicMock, project_id: str = "my-project"): + """Import and construct a GCPSecretManagerVault, then swap in a mock client.""" + from implementations.vault.gcp_secret_manager import GCPSecretManagerVault + + vault = GCPSecretManagerVault(project_id=project_id) + mock_client_instance = MagicMock() + vault._client = mock_client_instance # replace whatever __init__ created + return vault, mock_client_instance + + +# ── construction ────────────────────────────────────────────────────────────── + + +class TestConstruction: + def test_from_env_reads_gcp_project_id(self, mock_gcp_secretmanager, monkeypatch): + monkeypatch.setenv("GCP_PROJECT_ID", "test-project") + vault, _ = _make_vault(mock_gcp_secretmanager, project_id="test-project") + + monkeypatch.setenv("GCP_PROJECT_ID", "from-env-project") + # Re-import so from_env picks up the fresh env var. + import importlib + import sys + + from implementations.vault.gcp_secret_manager import GCPSecretManagerVault as _V + + importlib.reload(sys.modules[_V.__module__]) + from implementations.vault.gcp_secret_manager import GCPSecretManagerVault as _V2 + + monkeypatch.setenv("GCP_PROJECT_ID", "env-project") + vault2 = _V2.from_env() + assert vault2._project_id == "env-project" + + def test_from_env_raises_if_no_project_id(self, mock_gcp_secretmanager, monkeypatch): + monkeypatch.delenv("GCP_PROJECT_ID", raising=False) + from implementations.vault.gcp_secret_manager import GCPSecretManagerVault + + with pytest.raises(ValueError, match="GCP_PROJECT_ID"): + GCPSecretManagerVault.from_env() + + +# ── get_secret ──────────────────────────────────────────────────────────────── + + +class TestGetSecret: + def test_returns_secret_value(self, mock_gcp_secretmanager): + vault, mock_client = _make_vault(mock_gcp_secretmanager) + mock_response = MagicMock() + mock_response.payload.data = b"super-secret-value" + mock_client.access_secret_version.return_value = mock_response + + result = vault.get_secret("CDP_SSH_KEY") + assert result == "super-secret-value" + + def test_underscore_normalised_to_hyphen(self, mock_gcp_secretmanager): + """Underscores in key names are converted to hyphens for Secret Manager IDs.""" + vault, mock_client = _make_vault(mock_gcp_secretmanager) + mock_response = MagicMock() + mock_response.payload.data = b"value" + mock_client.access_secret_version.return_value = mock_response + + vault.get_secret("CDP_SSH_KEY") + call_args = mock_client.access_secret_version.call_args + name = call_args[1]["request"]["name"] + assert "CDP-SSH-KEY" in name + assert "_" not in name.split("/secrets/")[1] + + def test_correct_resource_path_format(self, mock_gcp_secretmanager): + vault, mock_client = _make_vault(mock_gcp_secretmanager, project_id="my-project") + mock_response = MagicMock() + mock_response.payload.data = b"value" + mock_client.access_secret_version.return_value = mock_response + + vault.get_secret("my-secret") + call_args = mock_client.access_secret_version.call_args + name = call_args[1]["request"]["name"] + assert name == "projects/my-project/secrets/my-secret/versions/latest" + + def test_not_found_raises_vault_secret_not_found(self, mock_gcp_secretmanager): + vault, mock_client = _make_vault(mock_gcp_secretmanager) + + class NotFound(Exception): + pass + + mock_client.access_secret_version.side_effect = NotFound("404 not found") + with pytest.raises(VaultSecretNotFoundError): + vault.get_secret("missing-key") + + def test_permission_denied_raises_vault_unavailable(self, mock_gcp_secretmanager): + vault, mock_client = _make_vault(mock_gcp_secretmanager) + + class PermissionDenied(Exception): + pass + + mock_client.access_secret_version.side_effect = PermissionDenied("403 permission denied") + with pytest.raises(VaultUnavailableError): + vault.get_secret("any-key") + + def test_network_error_raises_vault_unavailable(self, mock_gcp_secretmanager): + vault, mock_client = _make_vault(mock_gcp_secretmanager) + mock_client.access_secret_version.side_effect = ConnectionError("network timeout") + with pytest.raises(VaultUnavailableError): + vault.get_secret("any-key") + + def test_secret_decoded_as_utf8(self, mock_gcp_secretmanager): + vault, mock_client = _make_vault(mock_gcp_secretmanager) + mock_response = MagicMock() + mock_response.payload.data = "café".encode() + mock_client.access_secret_version.return_value = mock_response + assert vault.get_secret("key") == "café" diff --git a/tests/unit/test_vertex_llm_client.py b/tests/unit/test_vertex_llm_client.py new file mode 100644 index 0000000..79399bb --- /dev/null +++ b/tests/unit/test_vertex_llm_client.py @@ -0,0 +1,200 @@ +"""Unit tests for VertexAILLMClient — both Claude-on-Vertex and Gemini paths.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from core.exceptions import LLMAuthError, LLMResponseError, LLMUnavailableError +from implementations.llm.vertex_ai.llm_client import VertexAILLMClient + +MESSAGES = [{"role": "user", "content": "What is YARN?"}] +PROJECT = "my-project" +LOCATION = "europe-west1" + + +# ── Routing ─────────────────────────────────────────────────────────────────── + + +class TestRouting: + def test_claude_model_routes_to_claude_path(self): + """Model names starting with 'claude' use AnthropicVertex.""" + client = VertexAILLMClient("claude-sonnet@20250201", PROJECT, LOCATION) + assert client._is_claude is True + + def test_gemini_model_routes_to_gemini_path(self): + """Model names not starting with 'claude' use the Gemini SDK.""" + client = VertexAILLMClient("gemini-2.0-flash", PROJECT, LOCATION) + assert client._is_claude is False + + def test_gemini_25_pro_routes_to_gemini_path(self): + client = VertexAILLMClient("gemini-2.5-pro", PROJECT, LOCATION) + assert client._is_claude is False + + +# ── Claude-on-Vertex path ───────────────────────────────────────────────────── + + +# Stub exception classes that mirror the real anthropic exception hierarchy. +class _FakeAuthError(Exception): + pass + + +class _FakeConnectionError(Exception): + pass + + +class _FakeAPIStatusError(Exception): + status_code = 500 + + +class TestClaudeOnVertex: + def _make_client(self) -> VertexAILLMClient: + return VertexAILLMClient("claude-sonnet@20250201", PROJECT, LOCATION) + + def _mock_response(self, text: str = "YARN manages cluster resources") -> MagicMock: + block = MagicMock() + block.text = text + response = MagicMock() + response.content = [block] + response.usage.input_tokens = 10 + response.usage.output_tokens = 5 + return response + + def _patch_anthropic(self, mock_anthropic: MagicMock) -> None: + """Wire stub exception classes onto the mock anthropic module.""" + mock_anthropic.AuthenticationError = _FakeAuthError + mock_anthropic.APIConnectionError = _FakeConnectionError + mock_anthropic.APIStatusError = _FakeAPIStatusError + + @patch("implementations.llm.vertex_ai.llm_client.anthropic") + def test_complete_returns_text(self, mock_anthropic): + self._patch_anthropic(mock_anthropic) + mock_anthropic.AnthropicVertex.return_value.messages.create.return_value = ( + self._mock_response() + ) + client = self._make_client() + result = client.complete(MESSAGES) + assert result == "YARN manages cluster resources" + + @patch("implementations.llm.vertex_ai.llm_client.anthropic") + def test_system_prompt_passed_through(self, mock_anthropic): + self._patch_anthropic(mock_anthropic) + mock_create = mock_anthropic.AnthropicVertex.return_value.messages.create + mock_create.return_value = self._mock_response() + client = self._make_client() + client.complete(MESSAGES, system="You are an expert.") + call_kwargs = mock_create.call_args[1] + assert call_kwargs["system"] == "You are an expert." + + @patch("implementations.llm.vertex_ai.llm_client.anthropic") + def test_auth_error_raises_llm_auth_error(self, mock_anthropic): + self._patch_anthropic(mock_anthropic) + mock_anthropic.AnthropicVertex.return_value.messages.create.side_effect = _FakeAuthError( + "bad creds" + ) + client = self._make_client() + with pytest.raises(LLMAuthError): + client.complete(MESSAGES) + + @patch("implementations.llm.vertex_ai.llm_client.anthropic") + def test_connection_error_raises_llm_unavailable(self, mock_anthropic): + self._patch_anthropic(mock_anthropic) + mock_anthropic.AnthropicVertex.return_value.messages.create.side_effect = ( + _FakeConnectionError("timeout") + ) + client = self._make_client() + with pytest.raises(LLMUnavailableError): + client.complete(MESSAGES) + + @patch("implementations.llm.vertex_ai.llm_client.anthropic") + def test_empty_response_raises_llm_response_error(self, mock_anthropic): + self._patch_anthropic(mock_anthropic) + response = MagicMock() + response.content = [] + response.usage.input_tokens = 5 + response.usage.output_tokens = 0 + mock_anthropic.AnthropicVertex.return_value.messages.create.return_value = response + client = self._make_client() + with pytest.raises(LLMResponseError): + client.complete(MESSAGES) + + @patch("implementations.llm.vertex_ai.llm_client.anthropic") + def test_max_tokens_forwarded(self, mock_anthropic): + self._patch_anthropic(mock_anthropic) + mock_create = mock_anthropic.AnthropicVertex.return_value.messages.create + mock_create.return_value = self._mock_response() + client = self._make_client() + client.complete(MESSAGES, max_tokens=512, temperature=0.5) + call_kwargs = mock_create.call_args[1] + assert call_kwargs["max_tokens"] == 512 + + +# ── Gemini path ─────────────────────────────────────────────────────────────── + + +class TestGemini: + def _make_client(self) -> VertexAILLMClient: + return VertexAILLMClient("gemini-2.0-flash", PROJECT, LOCATION) + + def _mock_vertexai_modules(self): + """Return (mock_vertexai_module, mock_generative_model_class).""" + mock_vertexai = MagicMock() + mock_model_instance = MagicMock() + mock_response = MagicMock() + mock_response.text = "Gemini says: YARN manages resources" + mock_response.usage_metadata.prompt_token_count = 8 + mock_response.usage_metadata.candidates_token_count = 6 + mock_model_instance.generate_content.return_value = mock_response + return mock_vertexai, mock_model_instance, mock_response + + @patch.dict( + "sys.modules", + { + "vertexai": MagicMock(), + "vertexai.generative_models": MagicMock(), + }, + ) + @patch("implementations.llm.vertex_ai.llm_client.vertexai", create=True) + def test_complete_returns_text(self, _): + """Gemini path returns response.text.""" + import sys + + mock_gm = sys.modules["vertexai.generative_models"] + mock_model_instance = MagicMock() + mock_response = MagicMock() + mock_response.text = "Gemini answer" + mock_response.usage_metadata.prompt_token_count = 5 + mock_response.usage_metadata.candidates_token_count = 3 + mock_model_instance.generate_content.return_value = mock_response + mock_gm.GenerativeModel.return_value = mock_model_instance + + client = self._make_client() + with patch("implementations.llm.vertex_ai.llm_client.vertexai"): + with patch( + "implementations.llm.vertex_ai.llm_client.VertexAILLMClient._complete_gemini", + return_value="Gemini answer", + ): + result = client.complete(MESSAGES) + + assert result == "Gemini answer" + + def test_permission_denied_raises_llm_auth_error(self): + """A 403/PermissionDenied from Gemini maps to LLMAuthError.""" + client = self._make_client() + with patch.object(client, "_complete_gemini", side_effect=LLMAuthError("403")): + with pytest.raises(LLMAuthError): + client.complete(MESSAGES) + + def test_service_unavailable_raises_llm_unavailable(self): + """A ServiceUnavailable from Gemini maps to LLMUnavailableError.""" + client = self._make_client() + with patch.object(client, "_complete_gemini", side_effect=LLMUnavailableError("down")): + with pytest.raises(LLMUnavailableError): + client.complete(MESSAGES) + + def test_empty_response_raises_llm_response_error(self): + """An empty Gemini response maps to LLMResponseError.""" + client = self._make_client() + with patch.object(client, "_complete_gemini", side_effect=LLMResponseError("empty")): + with pytest.raises(LLMResponseError): + client.complete(MESSAGES)