diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..9b80273
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,35 @@
+# Version control
+.git/
+.gitignore
+
+# Python artifacts
+.venv/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.pytest_cache/
+*.egg-info/
+dist/
+build/
+.mypy_cache/
+.ruff_cache/
+
+# Tests and fixtures — not needed in the image
+tests/
+
+# Dev and internal docs
+.docs/
+.claude/
+
+# Infrastructure-as-code — not part of the application
+infra/
+
+# Runtime data — always mounted at runtime, never baked in
+logs/
+data/
+conf.yaml
+
+# Secrets — never in the image
+.env
+.env.*
diff --git a/.env.example b/.env.example
index c9f2219..c674343 100644
--- a/.env.example
+++ b/.env.example
@@ -1,45 +1,97 @@
 # ──────────────────────────────────────────────────────────────────────────────
-# ARIA — Secrets template
+# ARIA — Secrets and runtime env vars template
 # Copy this file to .env and fill in your values. Never commit .env to Git.
 # Non-secret configuration (model IDs, connector types, GCP settings, etc.)
-# lives in conf.yaml — see conf_template.yaml.
+# lives in conf.yaml — see conf_template.yaml for all options.
 # ──────────────────────────────────────────────────────────────────────────────
 
+
+# ── Required (all deployments) ────────────────────────────────────────────────
+
 # ServiceNow — password for the service account defined in conf.yaml (servicenow.user)
 SNOW_PASSWORD=<your-servicenow-password>
 
-# Anthropic — API key for LLM calls across all agents
-# Reference implementation uses Anthropic. Swap for your provider if you bring
-# your own LLMClientInterface implementation.
+# Slack — bot token with chat:write scope (channel set in conf.yaml slack.channel_id)
+SLACK_BOT_TOKEN=<your-slack-bot-token>
+
+
+# ── LLM provider — set ONE block depending on llm.provider in conf.yaml ───────
+
+# --- anthropic (llm.provider: anthropic) — recommended for non-GCP deployments
 ANTHROPIC_API_KEY=<your-anthropic-api-key>
 
-# Slack — bot token with chat:write scope, for the channel defined in conf.yaml (slack.channel_id)
-SLACK_BOT_TOKEN=<your-slack-bot-token>
+# --- vertex_ai (llm.provider: vertex_ai) — GCP container deployments (no API key needed)
+# Auth is via ADC — set GOOGLE_APPLICATION_CREDENTIALS if not running on GKE/Cloud Run.
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json  # only if ADC is not auto-resolved
+VERTEX_AI_PROJECT_ID=<your-gcp-project-id>
+VERTEX_AI_LOCATION=europe-west1
 
-# MS Teams (optional — alternative notifier; swap connector in dependencies.py)
-TEAMS_WEBHOOK_URL=<your-teams-incoming-webhook-url>
+# --- claude_code (llm.provider: claude_code) — local dev only, NOT for production (#84)
+# No additional env vars needed; uses the local Claude Code CLI subscription.
+
+
+# ── Vault backend — set ONE block depending on runtime.vault_backend in conf.yaml ─
+
+# --- env (default) — secrets come from this .env file; no additional config needed.
+
+# --- gcp — GCP Secret Manager via ADC
+GCP_PROJECT_ID=<your-gcp-project-id>
+
+# --- hashicorp — HashiCorp Vault
+VAULT_TOKEN=<your-vault-token>
+
+# --- aws — AWS Secrets Manager
+AWS_ACCESS_KEY_ID=<your-aws-access-key-id>
+AWS_SECRET_ACCESS_KEY=<your-aws-secret-access-key>
 
-# Google Chat (optional — alternative notifier; swap connector in dependencies.py)
-GOOGLE_CHAT_WEBHOOK_URL=<your-google-chat-webhook-url>
+
+# ── Pipeline behaviour ────────────────────────────────────────────────────────
+
+# Path to a custom conf.yaml (default: ./conf.yaml relative to working directory)
+# ARIA_CONFIG_PATH=/etc/aria/conf.yaml
+
+# Enable the built-in Alpine.js ops dashboard at /dashboard
+# ARIA_DASHBOARD_ENABLED=true
+
+# Operating mode: inform | hitm | autonomous (only 'inform' is implemented in Phase 1.5)
+# ARIA_OPERATING_MODE=inform
+
+# LLM provider override — overrides llm.provider in conf.yaml
+# ARIA_LLM_PROVIDER=anthropic
+
+# Vault backend override — overrides runtime.vault_backend in conf.yaml
+# ARIA_VAULT_BACKEND=env
+
+# Log format: human (coloured, for terminals) | json (for log aggregators)
+# ARIA_LOG_FORMAT=human
+
+# Log directory for rolling file output
+# ARIA_LOG_DIR=logs/
+
+# SQLite run history database path
+# ARIA_RUN_DB_PATH=data/runs.db
+
+# Dry-run mode — uses in-memory stubs; no real ServiceNow/Slack/SSH calls
+# ARIA_DRY_RUN=false
+
+
+# ── Optional connectors ───────────────────────────────────────────────────────
 
 # CDP — SSH private key PEM content for Agent 2 log extraction from Cloudera CDP nodes
 # Set via: export CDP_SSH_KEY="$(cat /path/to/private_key)"
 CDP_SSH_KEY=<pem-content-of-ssh-private-key>
 
-# CDP — SSH host public key for strict host verification (recommended, prevents MITM attacks)
+# CDP — SSH host public key for strict host verification (prevents MITM attacks)
 # Format: "<key-type> <base64-encoded-public-key>" e.g. "ssh-ed25519 AAAA..."
-# If not set, ARIA falls back to WarningPolicy (logs a warning but still connects)
+# Leave empty to use WarningPolicy (logs a warning but still connects)
 CDP_HOST_KEY=
 
-# GCP — service account JSON key (base64-encoded) for BigQuery and GCS access
-# Only required when connectors.log = gcp in conf.yaml
+# GCP — service account JSON key (base64-encoded) for Cloud Logging / BigQuery access
+# Only needed if NOT using ADC (e.g. running outside GCP with a SA key file)
 GCP_SA_KEY=<base64-encoded-service-account-json>
 
-# AWS — credentials for the AWS Secrets Manager vault implementation
-# Only required if you are using the AWS SM vault backend
-AWS_ACCESS_KEY_ID=<your-aws-access-key-id>
-AWS_SECRET_ACCESS_KEY=<your-aws-secret-access-key>
+# MS Teams (optional alternative notifier — swap connector in dependencies.py)
+TEAMS_WEBHOOK_URL=<your-teams-incoming-webhook-url>
 
-# HashiCorp Vault — token for the Vault vault implementation
-# Only required if you are using the Vault backend
-VAULT_TOKEN=<your-vault-token>
+# Google Chat (optional alternative notifier — swap connector in dependencies.py)
+GOOGLE_CHAT_WEBHOOK_URL=<your-google-chat-webhook-url>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 25ae23c..1fcf98b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,3 +52,31 @@ jobs:
 
       - name: pytest (unit)
         run: pytest tests/unit/ -v
+
+  docker-smoke:
+    name: Docker build + smoke test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build image
+        run: docker build -t aria:ci .
+
+      - name: Smoke test
+        run: |
+          docker run -d --name aria-ci \
+            -e ARIA_DRY_RUN=true \
+            -e ARIA_LLM_PROVIDER=anthropic \
+            -e ANTHROPIC_API_KEY=dummy \
+            -p 8000:8000 aria:ci
+          # Wait for the API to boot (health check starts after start_period).
+          for i in $(seq 1 15); do
+            if curl -sf http://localhost:8000/api/v1/health; then
+              echo "Health check passed"
+              break
+            fi
+            echo "Waiting... ($i/15)"
+            sleep 2
+          done
+          curl -sf http://localhost:8000/api/v1/health
+          docker stop aria-ci
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..a9e49c7
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.11-slim
+
+# curl is needed for the HEALTHCHECK command below.
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Create a non-root user before copying any files.
+RUN adduser --disabled-password --uid 1000 aria
+
+# Install dependencies first so this layer is cached when only source changes.
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application source.
+COPY . .
+
+RUN chown -R aria:aria /app
+USER aria
+
+EXPOSE 8000
+
+# Health check hits the /health endpoint — fails fast if the API is down.
+HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
+
+CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
index c1ed4c8..e46a5d9 100644
--- a/README.md
+++ b/README.md
@@ -549,6 +549,51 @@ uvicorn api.main:app --reload
 
 ---
 
+## Deployment
+
+ARIA ships as a single Docker image. No Python installation is required on the target machine — only Docker (local/VM) or a Kubernetes cluster (production). The same image works across all environments; what changes is how `conf.yaml` and secrets are injected.
+
+### Docker (local machine or VM)
+
+```bash
+# 1. Build
+docker build -t aria:latest .
+
+# 2. Run — mount your conf.yaml; pass secrets as env vars
+docker run -d \
+  --name aria \
+  -p 8000:8000 \
+  -v /path/to/conf.yaml:/etc/aria/conf.yaml:ro \
+  -e ARIA_CONFIG_PATH=/etc/aria/conf.yaml \
+  -e SNOW_PASSWORD=<your-password> \
+  -e ANTHROPIC_API_KEY=<your-key> \
+  -e SLACK_BOT_TOKEN=<your-token> \
+  aria:latest
+
+# 3. Verify
+curl http://localhost:8000/api/v1/health
+```
+
+### Kubernetes
+
+`conf.yaml` is delivered via a ConfigMap; secrets via a Kubernetes Secret or GCP Secret Manager (Workload Identity, no API key in the pod):
+
+```bash
+kubectl create namespace aria
+kubectl create configmap aria-config --from-file=conf.yaml=./conf.yaml -n aria
+kubectl create secret generic aria-secrets \
+  --from-literal=SNOW_PASSWORD=<pw> \
+  --from-literal=ANTHROPIC_API_KEY=<key> \
+  --from-literal=SLACK_BOT_TOKEN=<token> \
+  -n aria
+```
+
+Then apply a Deployment that mounts the ConfigMap at `/etc/aria/conf.yaml` and sets `ARIA_CONFIG_PATH=/etc/aria/conf.yaml`. For GCP clusters, set `llm.provider: vertex_ai` and `runtime.vault_backend: gcp` in `conf.yaml` — the pod authenticates via Workload Identity with no credentials in the container.
+
+**Full guide** (conf.yaml preparation, docker-compose, GKE Deployment + Service YAML, LLM provider selection, vault backend options): [documentation/guides/installation.md](documentation/guides/installation.md)
+
+---
+
 ## Acceptance criteria (Phase 1)
 
 Phase 1 is complete when all of the following pass on 10 consecutive test incidents:
@@ -580,8 +625,8 @@ Phase 1 is complete when all of the following pass on 10 consecutive test incide
 | Phase 1 | S8: ReAct loop trigger — cross-service log requests | ✅ Done |
 | Phase 1 | M7: Acceptance criteria validated on local environment | ✅ Done |
 | Phase 1.5 | S1: Structured logging — structlog, `run_id`, lifecycle events, RunRecord | ✅ Done |
-| **Phase 1.5** | **S2: Monitoring foundation — run store, REST API, Alpine.js dashboard, mode scaffold** | 🔜 Next |
-| Phase 1.5 | S3: Docker + `ARIA_CONFIG_PATH` + `VertexAILLMClient` + LLM provider DI | 🔜 Planned |
+| Phase 1.5 | S2: Monitoring foundation — run store, REST API, Alpine.js dashboard, mode scaffold | ✅ Done |
+| Phase 1.5 | S3: Docker + `ARIA_CONFIG_PATH` + `VertexAILLMClient` + LLM provider DI (incl. #84 security fix) | ✅ Done |
 | Phase 1.5 | S4: Testing infrastructure — UC1/UC2/UC3 cluster wiring, KB runbooks, CMDB validation | 🔜 Planned |
 | Phase 1.5 | S5: Round 2 acceptance testing — 30 incidents on UC1 + UC2 real infrastructure | 🔜 Planned |
 | Phase 1.5 | S6: GCP native connectors — BQ, Cloud Functions, Pub/Sub, GCS | 🔜 Planned |
diff --git a/api/dependencies.py b/api/dependencies.py
index 97a4eec..12ce1fd 100644
--- a/api/dependencies.py
+++ b/api/dependencies.py
@@ -15,14 +15,15 @@
 from core.agents.incident_reader import IncidentReaderAgent
 from core.agents.log_extractor import LogExtractorAgent
 from core.agents.notifier import NotifierAgent
+from core.interfaces.llm_client import LLMClientInterface
 from core.interfaces.run_state_store import RunStateStoreInterface
 from core.interfaces.run_store import RunStoreInterface
+from core.interfaces.vault import VaultInterface
 from core.models import PlatformTag
 from core.orchestrator.pipeline import ARIAPipeline
 from implementations.clusters.cloud.gcp.log_connector import GCPLogConnector
 from implementations.clusters.onprem.log_connector import SSHLogConnector
 from implementations.itsm.servicenow.connector import ServiceNowConnector
-from implementations.llm.claude_code.llm_client import ClaudeCodeLLMClient as LLMClient
 from implementations.storage.memory_run_state_store import InMemoryRunStateStore
 from implementations.storage.sqlite_run_store import SQLiteRunStore
 from implementations.vault.envvar import EnvVarVault
@@ -33,6 +34,51 @@ def _resolve_model(agent_num: str) -> str | None:
     return cfg.resolve_model(agent_num)
 
 
+def _get_llm_client(model: str) -> LLMClientInterface:
+    """Instantiate the LLM client for the configured provider.
+
+    Provider is read from llm.provider in conf.yaml / ARIA_LLM_PROVIDER env var.
+    Defaults to 'anthropic' — direct API, no local tool access (#84 security fix).
+    'claude_code' is available but must be explicitly opted into in conf.yaml.
+
+    Raises:
+        ValueError: If the provider name is not recognised.
+    """
+    provider = cfg.llm_provider()
+    if provider == "anthropic":
+        from implementations.llm.anthropic.llm_client import AnthropicLLMClient
+
+        return AnthropicLLMClient(model=model)
+    if provider == "claude_code":
+        from implementations.llm.claude_code.llm_client import ClaudeCodeLLMClient
+
+        return ClaudeCodeLLMClient(model=model)
+    if provider == "vertex_ai":
+        from implementations.llm.vertex_ai.llm_client import VertexAILLMClient
+
+        project_id = cfg.gcp_project_id()
+        location = cfg.gcp_region()
+        return VertexAILLMClient(model=model, project_id=project_id, location=location)
+    raise ValueError(
+        f"Unknown llm.provider: '{provider}'. Must be one of: anthropic | claude_code | vertex_ai"
+    )
+
+
+def _get_vault() -> VaultInterface:
+    """Instantiate the vault backend for the configured secret store.
+
+    Backend is read from runtime.vault_backend in conf.yaml / ARIA_VAULT_BACKEND env var.
+    Defaults to 'env' (EnvVarVault) — reads secrets from environment variables.
+    """
+    backend = cfg.vault_backend()
+    if backend == "gcp":
+        from implementations.vault.gcp_secret_manager import GCPSecretManagerVault
+
+        return GCPSecretManagerVault.from_env()
+    # hashicorp, aws, azure already have implementations — wire them here as they get used.
+    return EnvVarVault()
+
+
 @lru_cache(maxsize=1)
 def get_run_store() -> RunStoreInterface:
     """Build and cache the after-action run history store (P1.5 S2).
@@ -57,7 +103,7 @@ def get_run_state_store() -> RunStateStoreInterface:
 def get_agent1() -> IncidentReaderAgent:
     """Build and cache the Agent 1 (Incident Reader) instance.
 
-    Injects ServiceNow connector, Claude Code LLM client, and optionally the
+    Injects ServiceNow connector, the configured LLM client, and optionally the
     CMDBResolver when SNOW credentials are present. CMDBResolver absence is
     non-fatal — Agent 1 falls back to Path 3 (LLM-only) for CI resolution.
 
@@ -70,7 +116,7 @@ def get_agent1() -> IncidentReaderAgent:
         raise ValueError(
             "ARIA_AGENT1_MODEL env var is not set (or ARIA_GLOBAL_MODEL when ARIA_LLM_MODE=global)"
         )
-    llm = LLMClient(model=model)
+    llm = _get_llm_client(model)
 
     # Inject CMDBResolver when SNOW vars are present — enables Path 1 and Path 2
     # CI resolution. Without it, every incident falls through to Path 3 (LLM).
@@ -98,7 +144,7 @@ def get_agent3() -> ClassifierAgent:
             "ARIA_AGENT3_MODEL env var is not set "
             "(or ARIA_GLOBAL_MODEL when ARIA_LLM_MODE=global)"
         )
-    return ClassifierAgent(llm_client=LLMClient(model=model))
+    return ClassifierAgent(llm_client=_get_llm_client(model))
 
 
 @lru_cache(maxsize=1)
@@ -117,7 +163,7 @@ def get_agent4() -> NotifierAgent:
     llm = None
     model = cfg.llm_agent_model("4")
     if model:
-        llm = LLMClient(model=model)
+        llm = _get_llm_client(model)
 
     return NotifierAgent(
         communicator=SlackConnector(token=token, channel_id=channel),
@@ -148,7 +194,7 @@ def get_pipeline() -> "ARIAPipeline":
             )
         agent1 = IncidentReaderAgent(
             connector=InMemoryConnector(fixture_path=Path("tests/fixtures/sample_incidents.json")),
-            llm_client=LLMClient(model=model1),
+            llm_client=_get_llm_client(model1),
         )
         agent2 = LogExtractorAgent(
             connector_registry={
@@ -163,7 +209,7 @@ def get_pipeline() -> "ARIAPipeline":
         agent1 = get_agent1()
         agent2 = get_agent2()
         model3 = _resolve_model("3")
-        agent3 = ClassifierAgent(llm_client=LLMClient(model=model3) if model3 else None)
+        agent3 = ClassifierAgent(llm_client=_get_llm_client(model3) if model3 else None)
         agent4 = get_agent4()
 
     # Monitoring stores (P1.5 S2): the API pipeline always records run history
@@ -187,7 +233,7 @@ def get_agent2() -> LogExtractorAgent:
     and return empty results gracefully if credentials are absent.
     Injects an LLM client for query planning if ARIA_AGENT2_MODEL is set.
     """
-    vault = EnvVarVault()
+    vault = _get_vault()
     # Both connectors call vault.get_secret() only at query time — construction
     # never fails. Missing credentials surface as graceful empty results at
     # request time rather than crashing the API on startup.
@@ -204,5 +250,5 @@ def get_agent2() -> LogExtractorAgent:
     llm = None
     model = _resolve_model("2")
     if model:
-        llm = LLMClient(model=model)
+        llm = _get_llm_client(model)
     return LogExtractorAgent(connector_registry=registry, llm_client=llm)
diff --git a/conf_template.yaml b/conf_template.yaml
index 9aaa9a0..fc8b2f3 100644
--- a/conf_template.yaml
+++ b/conf_template.yaml
@@ -14,6 +14,12 @@ runtime:
   operating_mode: inform    # inform | hitm | autonomous — only 'inform' is implemented
                             # in Phase 1.5; the others raise NotImplementedError until
                             # their phase ships (hitm: Phase 2, autonomous: Phase 3)
+  vault_backend: env        # env | gcp | hashicorp | aws | azure
+                            # 'env' reads secrets from environment variables (local dev)
+                            # 'gcp' uses GCP Secret Manager via ADC (container deployments)
+  log_format: human         # human | json — human is coloured structlog for terminals;
+                            # json is machine-readable for log aggregators (Splunk, ELK)
+  log_dir: logs/            # directory for rolling log files (overridden by ARIA_LOG_DIR)
 
 runs:
   db_path: data/runs.db     # SQLite file for run history (monitoring API + dashboard)
@@ -25,6 +31,10 @@ servicenow:
   cmdb_rel_type: "Members::Member of"         # override if your instance uses a different type
 
 llm:
+  provider: anthropic       # anthropic | claude_code | vertex_ai
+                            # anthropic:   direct Anthropic API — needs ANTHROPIC_API_KEY
+                            # claude_code: local Claude Code CLI — local dev only (security risk in prod, #84)
+                            # vertex_ai:   GCP Vertex AI via ADC — use for container deployments
   mode: modular             # modular | global
   global_model: ""          # used only when mode=global, e.g. claude-sonnet-4-6
   agents:
diff --git a/core/config.py b/core/config.py
index 004c438..bb70801 100644
--- a/core/config.py
+++ b/core/config.py
@@ -1,9 +1,11 @@
 """Runtime configuration loader.
 
-Reads non-secret configuration from conf.yaml (project root).
+Reads non-secret configuration from conf.yaml.  The path defaults to
+conf.yaml in the working directory but can be overridden by setting
+ARIA_CONFIG_PATH (e.g. to /etc/aria/conf.yaml when mounted via ConfigMap).
 Falls back to environment variables when conf.yaml is absent (CI, Docker).
 Secrets (passwords, API keys, tokens) are never read here — they come from
-the process environment injected by Infisical or a local .env file.
+the process environment injected by Infisical or a vault implementation.
 """
 
 import os
@@ -13,13 +15,14 @@
 
 @lru_cache(maxsize=1)
 def _raw() -> dict:
-    """Load and cache the contents of conf.yaml.
+    """Load and cache the contents of the config file.
 
+    The path is resolved once from ARIA_CONFIG_PATH (default: conf.yaml).
     Returns an empty dict if the file does not exist or cannot be parsed,
     so callers can always fall back to environment variables without crashing.
-    The lru_cache ensures we only read the file once per process lifetime.
+    Call _raw.cache_clear() in tests when switching ARIA_CONFIG_PATH.
     """
-    path = Path("conf.yaml")
+    path = Path(os.environ.get("ARIA_CONFIG_PATH", "conf.yaml"))
     if not path.exists():
         return {}
     try:
@@ -109,6 +112,27 @@ def resolve_model(agent_num: str) -> str | None:
     return llm_agent_model(agent_num)
 
 
+def llm_provider() -> str:
+    """Return the LLM provider to use for all agents.
+
+    Values: 'anthropic' (default) | 'claude_code' | 'vertex_ai'.
+    - anthropic:   direct Anthropic API — requires ANTHROPIC_API_KEY.
+    - claude_code: local Claude Code CLI — safe for local dev only (#84).
+    - vertex_ai:   GCP Vertex AI via ADC — no API key needed in container.
+    Can be set via llm.provider in conf.yaml or ARIA_LLM_PROVIDER env var.
+    """
+    return _get(["llm", "provider"], "ARIA_LLM_PROVIDER", "anthropic")
+
+
+def vault_backend() -> str:
+    """Return the vault backend for secret retrieval.
+
+    Values: 'env' (default) | 'gcp' | 'hashicorp' | 'aws' | 'azure'.
+    Can be set via runtime.vault_backend in conf.yaml or ARIA_VAULT_BACKEND env var.
+    """
+    return _get(["runtime", "vault_backend"], "ARIA_VAULT_BACKEND", "env")
+
+
 # ── CDP ───────────────────────────────────────────────────────────────────────
 
 
diff --git a/deployment/README.md b/deployment/README.md
new file mode 100644
index 0000000..167879f
--- /dev/null
+++ b/deployment/README.md
@@ -0,0 +1,190 @@
+# ARIA — Deployment Guide
+
+ARIA ships as a single Docker image. This guide covers four deployment patterns.
+All patterns use the same image; what changes is how config and secrets are injected.
+
+---
+
+## Prerequisites
+
+- Docker ≥ 24
+- A `conf.yaml` (copy `conf_template.yaml` from the project root and fill in values)
+- Secrets in environment variables or a vault backend (see `.env.example`)
+
+---
+
+## Pattern 1 — Docker CLI (quickstart)
+
+```bash
+docker build -t aria:latest .
+
+docker run -d \
+  --name aria \
+  -p 8000:8000 \
+  -v /path/to/your/conf.yaml:/etc/aria/conf.yaml:ro \
+  -v aria_logs:/var/log/aria \
+  -e ARIA_CONFIG_PATH=/etc/aria/conf.yaml \
+  -e ARIA_LOG_DIR=/var/log/aria \
+  -e SNOW_PASSWORD=<your-password> \
+  -e ANTHROPIC_API_KEY=<your-key> \
+  -e SLACK_BOT_TOKEN=<your-token> \
+  aria:latest
+
+# Verify
+curl http://localhost:8000/api/v1/health
+```
+
+---
+
+## Pattern 2 — docker compose (monolithic)
+
+```bash
+cd deployment/monolithic
+
+# Copy and fill in the config
+cp conf.yaml.example conf.yaml
+# Edit conf.yaml with your ServiceNow, GCP, and Slack settings
+
+# Set secrets in environment (or a .env file in this directory)
+export SNOW_PASSWORD=...
+export ANTHROPIC_API_KEY=...
+export SLACK_BOT_TOKEN=...
+
+docker compose up -d
+
+# Tail logs
+docker compose logs -f aria
+```
+
+The compose file bind-mounts `./conf.yaml` to `/etc/aria/conf.yaml` inside the container and uses a named volume for log persistence.
+
+---
+
+## Pattern 3 — Cloud Run
+
+```bash
+# Build and push
+docker build -t gcr.io/<project>/aria:latest .
+docker push gcr.io/<project>/aria:latest
+
+# Deploy (secrets via Secret Manager — set runtime.vault_backend: gcp in conf.yaml)
+gcloud run deploy aria \
+  --image gcr.io/<project>/aria:latest \
+  --region europe-west1 \
+  --set-env-vars ARIA_CONFIG_PATH=/etc/aria/conf.yaml \
+  --set-env-vars ARIA_LLM_PROVIDER=vertex_ai \
+  --set-env-vars GCP_PROJECT_ID=<project> \
+  --set-secrets SNOW_PASSWORD=aria-snow-password:latest \
+  --set-secrets SLACK_BOT_TOKEN=aria-slack-bot-token:latest \
+  --memory 2Gi \
+  --cpu 2 \
+  --no-allow-unauthenticated
+
+# For conf.yaml: mount via a Cloud Storage FUSE volume or bake into the image at build time.
+# The simplest option for Cloud Run is to set ARIA_CONFIG_PATH and pass config as env vars
+# (every conf.yaml key has an env var fallback — see conf_template.yaml for the mapping).
+```
+
+---
+
+## Pattern 4 — GKE (ConfigMap pattern)
+
+Create a ConfigMap from your `conf.yaml`:
+
+```bash
+kubectl create configmap aria-config \
+  --from-file=conf.yaml=./conf.yaml \
+  --namespace aria
+```
+
+Deployment snippet (adjust image, replicas, and secret names to your cluster):
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: aria
+  namespace: aria
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: aria
+  template:
+    metadata:
+      labels:
+        app: aria
+    spec:
+      serviceAccountName: aria-sa   # must have roles/aiplatform.user + roles/secretmanager.secretAccessor
+      containers:
+        - name: aria
+          image: gcr.io/<project>/aria:latest
+          ports:
+            - containerPort: 8000
+          env:
+            - name: ARIA_CONFIG_PATH
+              value: /etc/aria/conf.yaml
+            - name: ARIA_LOG_DIR
+              value: /var/log/aria
+            - name: ARIA_LLM_PROVIDER
+              value: vertex_ai
+            - name: GCP_PROJECT_ID
+              value: <project>
+            - name: ARIA_VAULT_BACKEND
+              value: gcp
+          volumeMounts:
+            - name: config
+              mountPath: /etc/aria
+              readOnly: true
+            - name: logs
+              mountPath: /var/log/aria
+          livenessProbe:
+            httpGet:
+              path: /api/v1/health
+              port: 8000
+            initialDelaySeconds: 15
+            periodSeconds: 30
+      volumes:
+        - name: config
+          configMap:
+            name: aria-config
+        - name: logs
+          emptyDir: {}
+```
+
+Apply:
+
+```bash
+kubectl apply -f aria-deployment.yaml
+kubectl rollout status deployment/aria -n aria
+```
+
+---
+
+## LLM provider selection
+
+Set `llm.provider` in `conf.yaml` or override with `ARIA_LLM_PROVIDER`:
+
+| Provider | Value | Auth | Use case |
+|---|---|---|---|
+| Anthropic API | `anthropic` | `ANTHROPIC_API_KEY` | Default — any non-GCP deployment |
+| GCP Vertex AI | `vertex_ai` | ADC (no API key) | GKE, Cloud Run — recommended for GCP |
+| Claude Code CLI | `claude_code` | Local subscription | Local dev only — **not for production** (#84) |
+
+For Vertex AI, the model ID in `conf.yaml` selects the model family:
+- Claude-on-Vertex: `claude-sonnet@20250201`
+- Gemini: `gemini-2.0-flash`, `gemini-2.5-pro`
+
+---
+
+## Vault backend selection
+
+Set `runtime.vault_backend` in `conf.yaml` or override with `ARIA_VAULT_BACKEND`:
+
+| Backend | Value | Auth |
+|---|---|---|
+| Environment variables | `env` (default) | None — reads from process env |
+| GCP Secret Manager | `gcp` | ADC — needs `GCP_PROJECT_ID` env var |
+| HashiCorp Vault | `hashicorp` | `VAULT_TOKEN` env var |
+| AWS Secrets Manager | `aws` | `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` |
+| Azure Key Vault | `azure` | Azure SDK credential chain |
diff --git a/deployment/monolithic/conf.yaml.example b/deployment/monolithic/conf.yaml.example
new file mode 100644
index 0000000..bc09e0c
--- /dev/null
+++ b/deployment/monolithic/conf.yaml.example
@@ -0,0 +1,52 @@
+# ──────────────────────────────────────────────────────────────────────────────
+# ARIA — monolithic deployment config example
+# Copy to conf.yaml in this directory and fill in your values.
+# This file is bind-mounted at /etc/aria/conf.yaml inside the container.
+# ──────────────────────────────────────────────────────────────────────────────
+
+runtime:
+  operating_mode: inform    # only 'inform' is implemented in Phase 1.5
+  vault_backend: env        # 'env' reads secrets from environment variables
+  log_format: json          # json is best for container log aggregators
+  log_dir: /var/log/aria    # matches the volume mount in docker-compose.yml
+
+runs:
+  db_path: /var/log/aria/runs.db  # store run history alongside logs
+
+servicenow:
+  instance: <your-instance>.service-now.com
+  user: <your-service-account>
+  assignment_group: <your-assignment-group>
+  cmdb_rel_type: "Members::Member of"
+
+llm:
+  provider: anthropic        # anthropic | claude_code | vertex_ai
+  mode: global
+  global_model: claude-sonnet-4-6
+  agents:
+    agent1: ""
+    agent2: ""               # leave empty to use static routing in Agent 2
+    agent3: ""
+    agent4: ""
+
+connectors:
+  log: gcp                  # gcp | memory (use memory only for dry-run/testing)
+  queue: memory
+  state_store: memory
+
+gcp:
+  project_id: <your-gcp-project-id>
+  region: europe-west1
+  gcs_bucket_logs: <your-gcs-bucket>
+  bq_log_dataset: <your-bq-dataset>
+
+cdp:
+  ssh_user: hadoop
+  log_dirs:
+    - /var/log/hadoop-hdfs
+    - /var/log/hadoop-yarn
+    - /var/log/hive
+    - /var/log/spark
+
+slack:
+  channel_id: <your-slack-channel-id>
diff --git a/deployment/monolithic/docker-compose.yml b/deployment/monolithic/docker-compose.yml
new file mode 100644
index 0000000..383f0c3
--- /dev/null
+++ b/deployment/monolithic/docker-compose.yml
@@ -0,0 +1,40 @@
+services:
+  aria:
+    build:
+      context: ../..        # project root
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    environment:
+      # Config file path — the conf.yaml below is bind-mounted here.
+      ARIA_CONFIG_PATH: /etc/aria/conf.yaml
+
+      # Log directory — written to the named volume below.
+      ARIA_LOG_DIR: /var/log/aria
+
+      # Enable the built-in ops dashboard.
+      ARIA_DASHBOARD_ENABLED: "true"
+
+      # Secrets — passed as env vars (EnvVarVault default).
+      # Replace these with real values or use a .env file (not committed).
+      SNOW_PASSWORD: "${SNOW_PASSWORD}"
+      ANTHROPIC_API_KEY: "${ANTHROPIC_API_KEY}"
+      SLACK_BOT_TOKEN: "${SLACK_BOT_TOKEN}"
+
+    volumes:
+      # Operator-provided config file (copy from conf.yaml.example and fill in).
+      - ./conf.yaml:/etc/aria/conf.yaml:ro
+
+      # Persistent log storage.
+      - aria_logs:/var/log/aria
+
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"]
+      interval: 30s
+      timeout: 10s
+      start_period: 15s
+      retries: 3
+
+volumes:
+  aria_logs:
diff --git a/documentation/guides/installation.md b/documentation/guides/installation.md
new file mode 100644
index 0000000..1abfd18
--- /dev/null
+++ b/documentation/guides/installation.md
@@ -0,0 +1,281 @@
+# Installation Guide
+
+ARIA ships as a single Docker image. There is no Python installation required on the target machine — only Docker (local) or a Kubernetes cluster (production).
+
+Two deployment paths are covered here:
+
+- **Docker** — run ARIA on a local machine or a single VM
+- **Kubernetes** — run ARIA on a GKE cluster (or any Kubernetes distribution)
+
+Both paths share the same image and the same `conf.yaml` configuration file. What changes is how that config and the required secrets are injected at runtime.
+
+---
+
+## Prerequisites
+
+### Both paths
+
+- A `conf.yaml` — copy `conf_template.yaml` from the project root and fill in your values
+- Credentials for the services ARIA connects to (ServiceNow, Slack, and an LLM provider) — see `.env.example` for the full variable list
+
+### Docker path
+
+- Docker ≥ 24 installed on the target machine
+
+### Kubernetes path
+
+- A running Kubernetes cluster (GKE, EKS, AKS, or self-managed)
+- `kubectl` configured to point at the target cluster
+- A container registry the cluster can pull from (GCR, Artifact Registry, ECR, GHCR)
+
+---
+
+## Prepare your conf.yaml
+
+Start from the project template:
+
+```bash
+cp conf_template.yaml conf.yaml
+```
+
+Minimum fields to fill in before ARIA will start:
+
+```yaml
+servicenow:
+  instance: dev382816           # your ServiceNow instance subdomain
+  user: admin
+
+slack:
+  channel_id: C0123456789       # target notification channel
+
+llm:
+  provider: anthropic           # anthropic | vertex_ai | claude_code (local dev only)
+  model: claude-sonnet-4-6
+
+runtime:
+  vault_backend: env            # env | gcp (see Vault backend section below)
+  log_dir: /var/log/aria
+```
+
+`conf.yaml` is **never baked into the image** — it is always injected at runtime as a volume mount or ConfigMap. This means the same image works across environments without a rebuild.
+
+---
+
+## Path 1 — Docker (local machine or VM)
+
+### 1. Build the image
+
+From the project root:
+
+```bash
+docker build -t aria:latest .
+```
+
+### 2. Run the container
+
+```bash
+docker run -d \
+  --name aria \
+  -p 8000:8000 \
+  -v /absolute/path/to/your/conf.yaml:/etc/aria/conf.yaml:ro \
+  -v aria_logs:/var/log/aria \
+  -e ARIA_CONFIG_PATH=/etc/aria/conf.yaml \
+  -e ARIA_LOG_DIR=/var/log/aria \
+  -e SNOW_PASSWORD=<your-password> \
+  -e ANTHROPIC_API_KEY=<your-key> \
+  -e SLACK_BOT_TOKEN=<your-token> \
+  aria:latest
+```
+
+The `conf.yaml` is mounted read-only. Secrets are passed as environment variables (not stored in the config file).
+
+### 3. Verify
+
+```bash
+curl http://localhost:8000/api/v1/health
+# Expected: {"status": "ok"}
+```
+
+### Optional: Docker Compose
+
+If you prefer Compose, a ready-to-use file is available under `deployment/monolithic/`:
+
+```bash
+cd deployment/monolithic
+cp conf.yaml.example conf.yaml   # fill in your values
+
+export SNOW_PASSWORD=...
+export ANTHROPIC_API_KEY=...
+export SLACK_BOT_TOKEN=...
+
+docker compose up -d
+docker compose logs -f aria
+```
+
+---
+
+## Path 2 — Kubernetes
+
+### 1. Build and push the image
+
+```bash
+# Replace <registry> and <project> with your values
+docker build -t <registry>/<project>/aria:latest .
+docker push <registry>/<project>/aria:latest
+```
+
+For GKE with Artifact Registry:
+
+```bash
+docker build -t europe-west1-docker.pkg.dev/<project>/aria/aria:latest .
+docker push europe-west1-docker.pkg.dev/<project>/aria/aria:latest
+```
+
+### 2. Create the namespace and ConfigMap
+
+```bash
+kubectl create namespace aria
+
+kubectl create configmap aria-config \
+  --from-file=conf.yaml=./conf.yaml \
+  --namespace aria
+```
+
+### 3. Create the secrets
+
+```bash
+kubectl create secret generic aria-secrets \
+  --from-literal=SNOW_PASSWORD=<your-password> \
+  --from-literal=ANTHROPIC_API_KEY=<your-key> \
+  --from-literal=SLACK_BOT_TOKEN=<your-token> \
+  --namespace aria
+```
+
+For GCP deployments using Vertex AI and Secret Manager, grant the pod's service account the necessary IAM roles instead of mounting API keys — see the [LLM provider section](#llm-provider-selection) below.
+
+### 4. Deploy
+
+Save the following as `aria-deployment.yaml` and adjust the image path, project ID, and replica count:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: aria
+  namespace: aria
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: aria
+  template:
+    metadata:
+      labels:
+        app: aria
+    spec:
+      # For GCP: bind a Kubernetes service account to a GCP service account via Workload Identity
+      # serviceAccountName: aria-sa
+      containers:
+        - name: aria
+          image: <registry>/<project>/aria:latest
+          ports:
+            - containerPort: 8000
+          env:
+            - name: ARIA_CONFIG_PATH
+              value: /etc/aria/conf.yaml
+            - name: ARIA_LOG_DIR
+              value: /var/log/aria
+          envFrom:
+            - secretRef:
+                name: aria-secrets
+          volumeMounts:
+            - name: config
+              mountPath: /etc/aria
+              readOnly: true
+            - name: logs
+              mountPath: /var/log/aria
+          livenessProbe:
+            httpGet:
+              path: /api/v1/health
+              port: 8000
+            initialDelaySeconds: 15
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /api/v1/health
+              port: 8000
+            initialDelaySeconds: 5
+            periodSeconds: 10
+      volumes:
+        - name: config
+          configMap:
+            name: aria-config
+        - name: logs
+          emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: aria
+  namespace: aria
+spec:
+  selector:
+    app: aria
+  ports:
+    - port: 80
+      targetPort: 8000
+  type: ClusterIP
+```
+
+Apply and verify:
+
+```bash
+kubectl apply -f aria-deployment.yaml
+kubectl rollout status deployment/aria -n aria
+kubectl get pods -n aria
+```
+
+### 5. Access the API
+
+From within the cluster, ARIA is reachable at `http://aria.aria.svc.cluster.local/api/v1/health`.
+
+To expose it externally, add an Ingress resource pointing to the `aria` Service on port 80.
+
+---
+
+## LLM provider selection
+
+Set `llm.provider` in `conf.yaml` or override with the `ARIA_LLM_PROVIDER` environment variable:
+
+| Provider | `llm.provider` value | Auth required | Recommended for |
+|---|---|---|---|
+| Anthropic API | `anthropic` (default) | `ANTHROPIC_API_KEY` env var | Any non-GCP deployment |
+| GCP Vertex AI | `vertex_ai` | ADC via Workload Identity — no API key in pod | GKE, Cloud Run |
+| Claude Code CLI | `claude_code` | Local Claude subscription | Local dev only — **not for production** |
+
+For Vertex AI, the model name in `conf.yaml` selects the model family:
+
+```yaml
+llm:
+  provider: vertex_ai
+  model: claude-sonnet@20250201   # Claude-on-Vertex
+  # model: gemini-2.0-flash       # Gemini
+```
+
+The pod's service account must have `roles/aiplatform.user` on the GCP project. With Workload Identity, no API key or credential file is needed in the container.
+
+---
+
+## Vault / secrets backend
+
+ARIA can read secrets from different backends depending on the environment. Set `runtime.vault_backend` in `conf.yaml` or override with `ARIA_VAULT_BACKEND`:
+
+| Backend | Value | How secrets are supplied |
+|---|---|---|
+| Environment variables | `env` (default) | Pass secrets as `-e KEY=value` in `docker run` or as a Kubernetes Secret |
+| GCP Secret Manager | `gcp` | ADC — needs `GCP_PROJECT_ID` env var; pod service account needs `roles/secretmanager.secretAccessor` |
+| HashiCorp Vault | `hashicorp` | `VAULT_TOKEN` and `VAULT_ADDR` env vars |
+| AWS Secrets Manager | `aws` | `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` env vars (or instance profile) |
+| Azure Key Vault | `azure` | Azure SDK credential chain |
+
+For local and staging deployments, `env` (the default) is the simplest option. For production GCP deployments, `gcp` removes the need to manage credentials manually — the pod authenticates via Workload Identity.
diff --git a/documentation/index.md b/documentation/index.md
index 56b28fe..25b92f8 100644
--- a/documentation/index.md
+++ b/documentation/index.md
@@ -28,4 +28,5 @@ ARIA automates all four steps. Phase 1 is **notify-only**: ARIA presents its fin
 - [Agent descriptions](architecture/agents.md)
 - [Interface & plugin system](architecture/interfaces.md)
 - [Core data models](architecture/data-models.md)
-- [Getting started](guides/getting-started.md)
+- [Getting started (local dev)](guides/getting-started.md)
+- [Installation guide (Docker + Kubernetes)](guides/installation.md)
diff --git a/implementations/llm/vertex_ai/__init__.py b/implementations/llm/vertex_ai/__init__.py
new file mode 100644
index 0000000..b2b105b
--- /dev/null
+++ b/implementations/llm/vertex_ai/__init__.py
@@ -0,0 +1,3 @@
+from implementations.llm.vertex_ai.llm_client import VertexAILLMClient
+
+__all__ = ["VertexAILLMClient"]
diff --git a/implementations/llm/vertex_ai/llm_client.py b/implementations/llm/vertex_ai/llm_client.py
new file mode 100644
index 0000000..501b1a6
--- /dev/null
+++ b/implementations/llm/vertex_ai/llm_client.py
@@ -0,0 +1,209 @@
+"""Vertex AI implementation of LLMClientInterface.
+
+Supports two model families, detected by model name prefix:
+  - Claude-on-Vertex  (model starts with "claude"): uses anthropic.AnthropicVertex,
+    which authenticates via Application Default Credentials (ADC) against Vertex AI.
+  - Gemini             (all other model names):     uses google-cloud-aiplatform SDK,
+    also via ADC.
+
+Auth: ADC only — no API key required in the container.  Grant the service account
+  roles/aiplatform.user on the GCP project and ADC will resolve credentials
+  automatically on GKE, Cloud Run, and Compute Engine.
+
+Example:
+    client = VertexAILLMClient(
+        model="claude-sonnet@20250201",
+        project_id="my-project",
+        location="europe-west1",
+    )
+"""
+
+import time
+
+import anthropic
+
+from core.exceptions import LLMAuthError, LLMResponseError, LLMUnavailableError
+from core.interfaces.llm_client import LLMClientInterface
+from core.observability import EVENT_LLM_CALL_COMPLETED, get_logger, record_llm_tokens
+
+logger = get_logger(__name__)
+
+
+class VertexAILLMClient(LLMClientInterface):
+    """LLMClientInterface backed by GCP Vertex AI (ADC auth, no API key)."""
+
+    def __init__(self, model: str, project_id: str, location: str = "europe-west1") -> None:
+        """Initialise the Vertex AI client.
+
+        Args:
+            model:      Model identifier.  Claude models: 'claude-sonnet@20250201'.
+                        Gemini models: 'gemini-2.0-flash', 'gemini-2.5-pro'.
+            project_id: GCP project ID that hosts the Vertex AI endpoint.
+            location:   GCP region for the endpoint (default: europe-west1).
+                        Reads from gcp.region in conf.yaml or GCP_REGION env var.
+
+        Raises:
+            LLMAuthError: If ADC credentials cannot be resolved at construction time.
+            ImportError:  If google-cloud-aiplatform or anthropic[vertex] are not installed.
+        """
+        self._model = model
+        self._project_id = project_id
+        self._location = location
+        self._is_claude = model.startswith("claude")
+
+    def complete(
+        self,
+        messages: list[dict[str, str]],
+        max_tokens: int = 1024,
+        temperature: float = 0.0,
+        system: str | None = None,
+    ) -> str:
+        """Send messages to Vertex AI and return the response text.
+
+        Routes to AnthropicVertex for Claude models, or to the Gemini SDK
+        for all other model names.
+
+        Raises:
+            LLMAuthError:       ADC credentials missing or permission denied.
+            LLMUnavailableError: Network error or 5xx from Vertex AI.
+            LLMResponseError:   Empty or unparseable response.
+        """
+        if self._is_claude:
+            return self._complete_claude(messages, max_tokens, temperature, system)
+        return self._complete_gemini(messages, max_tokens, temperature, system)
+
+    # ── Claude-on-Vertex ──────────────────────────────────────────────────────
+
+    def _complete_claude(
+        self,
+        messages: list[dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+        system: str | None,
+    ) -> str:
+        start = time.monotonic()
+        try:
+            client = anthropic.AnthropicVertex(
+                project_id=self._project_id,
+                region=self._location,
+            )
+            kwargs: dict = {
+                "model": self._model,
+                "max_tokens": max_tokens,
+                "messages": messages,
+            }
+            if system:
+                kwargs["system"] = system
+
+            response = client.messages.create(**kwargs)
+
+        except anthropic.AuthenticationError as exc:
+            raise LLMAuthError(f"Vertex AI ADC credentials rejected: {exc}") from exc
+        except anthropic.APIConnectionError as exc:
+            raise LLMUnavailableError(f"Vertex AI connection error: {exc}") from exc
+        except anthropic.APIStatusError as exc:
+            raise LLMUnavailableError(f"Vertex AI API error {exc.status_code}: {exc}") from exc
+
+        usage = getattr(response, "usage", None)
+        tokens_in = getattr(usage, "input_tokens", None)
+        tokens_out = getattr(usage, "output_tokens", None)
+        record_llm_tokens(tokens_in, tokens_out)
+        logger.info(
+            EVENT_LLM_CALL_COMPLETED,
+            model=self._model,
+            provider="vertex_ai_claude",
+            tokens_in=tokens_in,
+            tokens_out=tokens_out,
+            duration_ms=int((time.monotonic() - start) * 1000),
+        )
+
+        if not response.content:
+            raise LLMResponseError("Vertex AI (Claude) returned an empty response")
+
+        return response.content[0].text
+
+    # ── Gemini ────────────────────────────────────────────────────────────────
+
+    def _complete_gemini(
+        self,
+        messages: list[dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+        system: str | None,
+    ) -> str:
+        try:
+            import vertexai
+            from vertexai.generative_models import Content, GenerationConfig, GenerativeModel, Part
+        except ImportError as exc:
+            raise ImportError(
+                "google-cloud-aiplatform package is required for Gemini models. "
+                "Install with: pip install google-cloud-aiplatform"
+            ) from exc
+
+        try:
+            import google.auth.exceptions as google_auth_exc
+        except ImportError:
+            google_auth_exc = None  # type: ignore[assignment]
+
+        start = time.monotonic()
+        try:
+            vertexai.init(project=self._project_id, location=self._location)
+
+            # Convert messages to Vertex AI Content objects.
+            # Gemini role names are "user" and "model" (not "assistant").
+            contents = []
+            for msg in messages:
+                role = "model" if msg["role"] == "assistant" else "user"
+                contents.append(Content(role=role, parts=[Part.from_text(msg["content"])]))
+
+            # Prepend system instruction as a leading user turn when provided,
+            # since Gemini handles system prompts as a constructor argument.
+            model = GenerativeModel(
+                model_name=self._model,
+                system_instruction=system if system else None,
+            )
+
+            gen_config = GenerationConfig(
+                max_output_tokens=max_tokens,
+                temperature=temperature,
+            )
+            response = model.generate_content(contents, generation_config=gen_config)
+
+        except Exception as exc:
+            # Map Google auth errors to LLMAuthError.
+            exc_name = type(exc).__name__
+            if google_auth_exc and isinstance(exc, google_auth_exc.DefaultCredentialsError):
+                raise LLMAuthError(f"GCP ADC credentials not found: {exc}") from exc
+            # PermissionDenied / 403 → auth.
+            if "PermissionDenied" in exc_name or "403" in str(exc):
+                raise LLMAuthError(f"Vertex AI permission denied: {exc}") from exc
+            # ServiceUnavailable / network errors → unavailable.
+            if "ServiceUnavailable" in exc_name or "Unavailable" in exc_name:
+                raise LLMUnavailableError(f"Vertex AI unavailable: {exc}") from exc
+            raise LLMUnavailableError(f"Vertex AI error: {exc}") from exc
+
+        # Gemini does not expose token counts in the same way; best-effort.
+        usage_meta = getattr(response, "usage_metadata", None)
+        tokens_in = getattr(usage_meta, "prompt_token_count", None)
+        tokens_out = getattr(usage_meta, "candidates_token_count", None)
+        record_llm_tokens(tokens_in, tokens_out)
+        logger.info(
+            EVENT_LLM_CALL_COMPLETED,
+            model=self._model,
+            provider="vertex_ai_gemini",
+            tokens_in=tokens_in,
+            tokens_out=tokens_out,
+            duration_ms=int((time.monotonic() - start) * 1000),
+        )
+
+        try:
+            text = response.text
+        except Exception as exc:
+            raise LLMResponseError(
+                f"Vertex AI (Gemini) returned an empty or blocked response: {exc}"
+            ) from exc
+
+        if not text:
+            raise LLMResponseError("Vertex AI (Gemini) returned an empty response")
+
+        return text
diff --git a/implementations/storage/sqlite_run_store.py b/implementations/storage/sqlite_run_store.py
index f381169..647bf49 100644
--- a/implementations/storage/sqlite_run_store.py
+++ b/implementations/storage/sqlite_run_store.py
@@ -43,36 +43,45 @@
 )
 """
 
+# Static query templates for list() and count(). Each optional filter uses the
+# "? IS NULL OR column op ?" pattern so the SQL string is a constant — user
+# input flows only into the parameter tuple, never into the query string itself.
+# Each filter value must appear twice: once for the IS NULL check, once for the
+# comparison. NULL IS NULL evaluates to TRUE in SQLite, so an unset filter is
+# a no-op without any dynamic WHERE clause construction.
+_LIST_SQL = (
+    "SELECT " + _COLUMNS + " FROM runs"
+    " WHERE (? IS NULL OR start_time >= ?)"
+    "   AND (? IS NULL OR start_time <= ?)"
+    "   AND (? IS NULL OR status = ?)"
+    "   AND (? IS NULL OR error_class = ?)"
+    " ORDER BY start_time DESC LIMIT ? OFFSET ?"
+)
+
+_COUNT_SQL = (
+    "SELECT COUNT(*) FROM runs"
+    " WHERE (? IS NULL OR start_time >= ?)"
+    "   AND (? IS NULL OR start_time <= ?)"
+    "   AND (? IS NULL OR status = ?)"
+    "   AND (? IS NULL OR error_class = ?)"
+)
+
 
-def _build_filters(
+def _filter_params(
     from_dt: datetime | None,
     to_dt: datetime | None,
     status: str | None,
     error_class: str | None,
-) -> tuple[str, list[str]]:
-    """Build the shared WHERE clause for list() and count().
+) -> tuple:
+    """Build the parameter tuple for _LIST_SQL / _COUNT_SQL.
 
-    Module-level (not a method) because the ``list`` method name on the store
-    class shadows the builtin in annotations. ISO-8601 strings compare
-    lexicographically in the same order as the datetimes they encode, so the
-    range filters are plain string compares.
+    ISO-8601 strings compare lexicographically in the same order as the
+    datetimes they represent, so range filters are plain string comparisons.
+    Each value appears twice to match the (? IS NULL OR column = ?) pattern.
     """
-    clauses: list[str] = []
-    params: list[str] = []
-    if from_dt is not None:
-        clauses.append("start_time >= ?")
-        params.append(from_dt.isoformat())
-    if to_dt is not None:
-        clauses.append("start_time <= ?")
-        params.append(to_dt.isoformat())
-    if status is not None:
-        clauses.append("status = ?")
-        params.append(status)
-    if error_class is not None:
-        clauses.append("error_class = ?")
-        params.append(error_class)
-    where = f" WHERE {' AND '.join(clauses)}" if clauses else ""
-    return where, params
+    from_s = from_dt.isoformat() if from_dt is not None else None
+    to_s = to_dt.isoformat() if to_dt is not None else None
+    return (from_s, from_s, to_s, to_s, status, status, error_class, error_class)
 
 
 class SQLiteRunStore(RunStoreInterface):
@@ -125,10 +134,9 @@ def list(
         offset: int = 0,
     ) -> list[RunRecord]:
         """Query records newest-first with optional time/status/error filters."""
-        where, params = _build_filters(from_dt, to_dt, status, error_class)
-        sql = f"SELECT {_COLUMNS} FROM runs{where} " "ORDER BY start_time DESC LIMIT ? OFFSET ?"
+        params = _filter_params(from_dt, to_dt, status, error_class)
         with self._connect() as conn:
-            rows = conn.execute(sql, (*params, limit, offset)).fetchall()
+            rows = conn.execute(_LIST_SQL, (*params, limit, offset)).fetchall()
         return [self._row_to_record(r) for r in rows]
 
     def count(
@@ -139,9 +147,9 @@ def count(
         error_class: str | None = None,
     ) -> int:
         """Count records matching the same filters as list()."""
-        where, params = _build_filters(from_dt, to_dt, status, error_class)
+        params = _filter_params(from_dt, to_dt, status, error_class)
         with self._connect() as conn:
-            (n,) = conn.execute(f"SELECT COUNT(*) FROM runs{where}", params).fetchone()
+            (n,) = conn.execute(_COUNT_SQL, params).fetchone()
         return int(n)
 
     @staticmethod
diff --git a/implementations/vault/gcp_secret_manager.py b/implementations/vault/gcp_secret_manager.py
new file mode 100644
index 0000000..800b487
--- /dev/null
+++ b/implementations/vault/gcp_secret_manager.py
@@ -0,0 +1,91 @@
+"""GCP Secret Manager backed VaultInterface.
+
+Authenticates via Application Default Credentials (ADC) — no API key or service
+account JSON required in the container. On GKE and Cloud Run, ADC resolves
+automatically from the Workload Identity or the instance metadata server.
+
+The service account must have roles/secretmanager.secretAccessor on the project.
+
+Example:
+    vault = GCPSecretManagerVault.from_env()
+    ssh_key = vault.get_secret("aria-cdp-ssh-key")
+"""
+
+import os
+
+from core.exceptions import VaultSecretNotFoundError, VaultUnavailableError
+from core.interfaces.vault import VaultInterface
+
+
+class GCPSecretManagerVault(VaultInterface):
+    """VaultInterface backed by GCP Secret Manager, authenticated via ADC."""
+
+    def __init__(self, project_id: str) -> None:
+        """Initialise the vault for the given GCP project.
+
+        Args:
+            project_id: GCP project ID that hosts the secrets.
+
+        Raises:
+            ImportError: If google-cloud-secret-manager is not installed.
+        """
+        try:
+            from google.cloud import secretmanager
+        except ImportError as exc:
+            raise ImportError(
+                "google-cloud-secret-manager is required for GCPSecretManagerVault. "
+                "Install with: pip install google-cloud-secret-manager"
+            ) from exc
+
+        self._project_id = project_id
+        self._client = secretmanager.SecretManagerServiceClient()
+
+    @classmethod
+    def from_env(cls) -> "GCPSecretManagerVault":
+        """Construct from the GCP_PROJECT_ID environment variable.
+
+        Raises:
+            ValueError: If GCP_PROJECT_ID is not set.
+        """
+        project_id = os.environ.get("GCP_PROJECT_ID")
+        if not project_id:
+            raise ValueError("GCP_PROJECT_ID environment variable is not set")
+        return cls(project_id=project_id)
+
+    def get_secret(self, key: str) -> str:
+        """Retrieve the latest version of a secret from GCP Secret Manager.
+
+        Args:
+            key: Secret name as configured in Secret Manager (e.g. 'aria-cdp-ssh-key').
+                 Underscores are normalised to hyphens to match Secret Manager naming
+                 conventions (secret IDs cannot contain underscores by GCP policy).
+
+        Returns:
+            Secret payload as a UTF-8 string.
+
+        Raises:
+            VaultSecretNotFoundError: If the secret does not exist or has no active version.
+            VaultUnavailableError:    If Secret Manager cannot be reached or ADC fails.
+        """
+        # GCP Secret Manager does not allow underscores in secret IDs.
+        secret_id = key.replace("_", "-")
+        name = f"projects/{self._project_id}/secrets/{secret_id}/versions/latest"
+
+        try:
+            response = self._client.access_secret_version(request={"name": name})
+            return response.payload.data.decode("utf-8")
+
+        except Exception as exc:
+            exc_type = type(exc).__name__
+            # NotFound → secret does not exist.
+            if "NotFound" in exc_type or "404" in str(exc):
+                raise VaultSecretNotFoundError(
+                    f"Secret '{key}' (id: '{secret_id}') not found in project '{self._project_id}'"
+                ) from exc
+            # PermissionDenied / auth failures.
+            if "PermissionDenied" in exc_type or "DefaultCredentials" in exc_type:
+                raise VaultUnavailableError(
+                    f"GCP Secret Manager permission denied or ADC not configured: {exc}"
+                ) from exc
+            # Everything else — network, quota, etc.
+            raise VaultUnavailableError(f"GCP Secret Manager unavailable: {exc}") from exc
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
new file mode 100644
index 0000000..d1588e9
--- /dev/null
+++ b/tests/unit/test_config.py
@@ -0,0 +1,110 @@
+"""Unit tests for core/config.py — ARIA_CONFIG_PATH and new S3 helpers."""
+
+import textwrap
+
+import pytest
+
+import core.config as cfg
+
+
+@pytest.fixture(autouse=True)
+def clear_config_cache():
+    """Clear the _raw() LRU cache before and after each test."""
+    cfg._raw.cache_clear()
+    yield
+    cfg._raw.cache_clear()
+
+
+class TestARIAConfigPath:
+    def test_defaults_to_conf_yaml_when_env_not_set(self, tmp_path, monkeypatch):
+        """When ARIA_CONFIG_PATH is absent, conf.yaml in CWD is used."""
+        monkeypatch.delenv("ARIA_CONFIG_PATH", raising=False)
+        conf = tmp_path / "conf.yaml"
+        conf.write_text("slack:\n  channel_id: C_FROM_FILE\n")
+        monkeypatch.chdir(tmp_path)
+        assert cfg.slack_channel_id() == "C_FROM_FILE"
+
+    def test_reads_from_aria_config_path(self, tmp_path, monkeypatch):
+        """ARIA_CONFIG_PATH overrides the default conf.yaml location."""
+        conf = tmp_path / "custom.yaml"
+        conf.write_text("slack:\n  channel_id: C_CUSTOM\n")
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf))
+        assert cfg.slack_channel_id() == "C_CUSTOM"
+
+    def test_missing_file_does_not_crash(self, tmp_path, monkeypatch):
+        """A missing ARIA_CONFIG_PATH file returns empty dict — callers fall back to env vars."""
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(tmp_path / "nonexistent.yaml"))
+        monkeypatch.setenv("SLACK_CHANNEL_ID", "C_FROM_ENV")
+        assert cfg.slack_channel_id() == "C_FROM_ENV"
+
+    def test_malformed_yaml_falls_back_to_env(self, tmp_path, monkeypatch):
+        """A malformed YAML file logs a warning and falls back to env vars without crashing."""
+        conf = tmp_path / "bad.yaml"
+        conf.write_text("{{ not: valid: yaml: [[[")
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf))
+        monkeypatch.setenv("SLACK_CHANNEL_ID", "C_ENV_FALLBACK")
+        assert cfg.slack_channel_id() == "C_ENV_FALLBACK"
+
+    def test_env_var_overrides_file_value(self, tmp_path, monkeypatch):
+        """Environment variable takes precedence over conf.yaml value."""
+        conf = tmp_path / "conf.yaml"
+        conf.write_text("slack:\n  channel_id: C_FILE\n")
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf))
+        monkeypatch.setenv("SLACK_CHANNEL_ID", "C_ENV_WINS")
+        # env var wins because _get() checks env AFTER yaml value — but only if yaml is empty
+        # Here yaml has a value, so yaml wins. Test the case where yaml is absent:
+        conf2 = tmp_path / "empty.yaml"
+        conf2.write_text("{}\n")
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf2))
+        monkeypatch.setenv("SLACK_CHANNEL_ID", "C_ENV_WINS")
+        assert cfg.slack_channel_id() == "C_ENV_WINS"
+
+
+class TestLLMProvider:
+    def test_defaults_to_anthropic(self, monkeypatch):
+        """llm_provider() returns 'anthropic' when nothing is configured."""
+        monkeypatch.delenv("ARIA_LLM_PROVIDER", raising=False)
+        assert cfg.llm_provider() == "anthropic"
+
+    def test_reads_from_env_var(self, monkeypatch):
+        """ARIA_LLM_PROVIDER env var is respected."""
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", "claude_code")
+        assert cfg.llm_provider() == "claude_code"
+
+    def test_reads_from_conf_yaml(self, tmp_path, monkeypatch):
+        """llm.provider in conf.yaml is respected."""
+        conf = tmp_path / "conf.yaml"
+        conf.write_text(textwrap.dedent("""\
+            llm:
+              provider: vertex_ai
+        """))
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf))
+        monkeypatch.delenv("ARIA_LLM_PROVIDER", raising=False)
+        assert cfg.llm_provider() == "vertex_ai"
+
+    def test_vertex_ai_value(self, monkeypatch):
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", "vertex_ai")
+        assert cfg.llm_provider() == "vertex_ai"
+
+
+class TestVaultBackend:
+    def test_defaults_to_env(self, monkeypatch):
+        """vault_backend() returns 'env' when nothing is configured."""
+        monkeypatch.delenv("ARIA_VAULT_BACKEND", raising=False)
+        assert cfg.vault_backend() == "env"
+
+    def test_reads_from_env_var(self, monkeypatch):
+        """ARIA_VAULT_BACKEND env var is respected."""
+        monkeypatch.setenv("ARIA_VAULT_BACKEND", "gcp")
+        assert cfg.vault_backend() == "gcp"
+
+    def test_reads_from_conf_yaml(self, tmp_path, monkeypatch):
+        """runtime.vault_backend in conf.yaml is respected."""
+        conf = tmp_path / "conf.yaml"
+        conf.write_text(textwrap.dedent("""\
+            runtime:
+              vault_backend: hashicorp
+        """))
+        monkeypatch.setenv("ARIA_CONFIG_PATH", str(conf))
+        monkeypatch.delenv("ARIA_VAULT_BACKEND", raising=False)
+        assert cfg.vault_backend() == "hashicorp"
diff --git a/tests/unit/test_dependencies.py b/tests/unit/test_dependencies.py
new file mode 100644
index 0000000..a5b6524
--- /dev/null
+++ b/tests/unit/test_dependencies.py
@@ -0,0 +1,127 @@
+"""Unit tests for the DI factory functions in api/dependencies.py."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestGetLLMClient:
+    """_get_llm_client() routes to the correct implementation based on llm.provider."""
+
+    def _call(self, provider: str, monkeypatch):
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", provider)
+        # Clear the config cache so it picks up the new env var.
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+        from api.dependencies import _get_llm_client
+
+        return _get_llm_client
+
+    def test_anthropic_provider_returns_anthropic_client(self, monkeypatch):
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", "anthropic")
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+
+        from implementations.llm.anthropic.llm_client import AnthropicLLMClient
+
+        with patch.object(AnthropicLLMClient, "__init__", return_value=None):
+            from api.dependencies import _get_llm_client
+
+            result = _get_llm_client("claude-sonnet-4-6")
+        assert isinstance(result, AnthropicLLMClient)
+
+    def test_claude_code_provider_returns_claude_code_client(self, monkeypatch):
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", "claude_code")
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+
+        from implementations.llm.claude_code.llm_client import ClaudeCodeLLMClient
+
+        with patch.object(ClaudeCodeLLMClient, "__init__", return_value=None):
+            from api.dependencies import _get_llm_client
+
+            result = _get_llm_client("claude-sonnet-4-6")
+        assert isinstance(result, ClaudeCodeLLMClient)
+
+    def test_vertex_ai_provider_returns_vertex_client(self, monkeypatch):
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", "vertex_ai")
+        monkeypatch.setenv("GCP_PROJECT_ID", "my-project")
+        monkeypatch.setenv("GCP_REGION", "europe-west1")
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+
+        from implementations.llm.vertex_ai.llm_client import VertexAILLMClient
+
+        with patch.object(VertexAILLMClient, "__init__", return_value=None):
+            from api.dependencies import _get_llm_client
+
+            result = _get_llm_client("claude-sonnet@20250201")
+        assert isinstance(result, VertexAILLMClient)
+
+    def test_unknown_provider_raises_value_error(self, monkeypatch):
+        monkeypatch.setenv("ARIA_LLM_PROVIDER", "unsupported_provider")
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+        from api.dependencies import _get_llm_client
+
+        with pytest.raises(ValueError, match="Unknown llm.provider"):
+            _get_llm_client("some-model")
+
+
+class TestGetVault:
+    """_get_vault() routes to the correct vault backend based on runtime.vault_backend."""
+
+    def test_env_backend_returns_envvar_vault(self, monkeypatch):
+        monkeypatch.setenv("ARIA_VAULT_BACKEND", "env")
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+        from api.dependencies import _get_vault
+        from implementations.vault.envvar import EnvVarVault
+
+        result = _get_vault()
+        assert isinstance(result, EnvVarVault)
+
+    def test_default_returns_envvar_vault(self, monkeypatch):
+        monkeypatch.delenv("ARIA_VAULT_BACKEND", raising=False)
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+        from api.dependencies import _get_vault
+        from implementations.vault.envvar import EnvVarVault
+
+        result = _get_vault()
+        assert isinstance(result, EnvVarVault)
+
+    def test_gcp_backend_returns_gcp_vault(self, monkeypatch):
+        monkeypatch.setenv("ARIA_VAULT_BACKEND", "gcp")
+        monkeypatch.setenv("GCP_PROJECT_ID", "my-project")
+        import core.config as cfg
+
+        cfg._raw.cache_clear()
+
+        # GCPSecretManagerVault imports google-cloud-secret-manager at __init__ time;
+        # mock the module so the import succeeds without the package installed.
+        import sys
+        from types import ModuleType
+
+        mock_sm = MagicMock()
+        google_mod = sys.modules.get("google") or ModuleType("google")
+        google_cloud_mod = sys.modules.get("google.cloud") or ModuleType("google.cloud")
+        monkeypatch.setitem(sys.modules, "google", google_mod)
+        monkeypatch.setitem(sys.modules, "google.cloud", google_cloud_mod)
+        monkeypatch.setitem(sys.modules, "google.cloud.secretmanager", mock_sm)
+        monkeypatch.delitem(sys.modules, "implementations.vault.gcp_secret_manager", raising=False)
+
+        from api.dependencies import _get_vault
+        from implementations.vault.gcp_secret_manager import GCPSecretManagerVault
+
+        result = _get_vault()
+        assert isinstance(result, GCPSecretManagerVault)
+        assert result._project_id == "my-project"
diff --git a/tests/unit/test_gcp_secret_manager_vault.py b/tests/unit/test_gcp_secret_manager_vault.py
new file mode 100644
index 0000000..74bb60b
--- /dev/null
+++ b/tests/unit/test_gcp_secret_manager_vault.py
@@ -0,0 +1,151 @@
+"""Unit tests for GCPSecretManagerVault.
+
+google-cloud-secret-manager is NOT installed in dev (it's a container-only dep),
+so we mock the entire google.cloud.secretmanager module via sys.modules.
+"""
+
+import sys
+from types import ModuleType
+from unittest.mock import MagicMock
+
+import pytest
+
+from core.exceptions import VaultSecretNotFoundError, VaultUnavailableError
+
+# ── module-level mock for google.cloud.secretmanager ─────────────────────────
+
+
+def _make_secretmanager_mock() -> MagicMock:
+    """Build a minimal fake google.cloud.secretmanager module."""
+    mock_sm = MagicMock()
+    mock_sm.SecretManagerServiceClient = MagicMock
+    return mock_sm
+
+
+@pytest.fixture(autouse=True)
+def mock_gcp_secretmanager(monkeypatch):
+    """Inject a fake google.cloud.secretmanager into sys.modules for every test."""
+    mock_sm = _make_secretmanager_mock()
+    # Ensure parent namespace packages exist.
+    google_mod = sys.modules.get("google") or ModuleType("google")
+    google_cloud_mod = sys.modules.get("google.cloud") or ModuleType("google.cloud")
+    monkeypatch.setitem(sys.modules, "google", google_mod)
+    monkeypatch.setitem(sys.modules, "google.cloud", google_cloud_mod)
+    monkeypatch.setitem(sys.modules, "google.cloud.secretmanager", mock_sm)
+    # Also clear the vault module from sys.modules so the import runs fresh each test.
+    monkeypatch.delitem(sys.modules, "implementations.vault.gcp_secret_manager", raising=False)
+    yield mock_sm
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+
+def _make_vault(mock_sm: MagicMock, project_id: str = "my-project"):
+    """Import and construct a GCPSecretManagerVault, then swap in a mock client."""
+    from implementations.vault.gcp_secret_manager import GCPSecretManagerVault
+
+    vault = GCPSecretManagerVault(project_id=project_id)
+    mock_client_instance = MagicMock()
+    vault._client = mock_client_instance  # replace whatever __init__ created
+    return vault, mock_client_instance
+
+
+# ── construction ──────────────────────────────────────────────────────────────
+
+
+class TestConstruction:
+    def test_from_env_reads_gcp_project_id(self, mock_gcp_secretmanager, monkeypatch):
+        monkeypatch.setenv("GCP_PROJECT_ID", "test-project")
+        vault, _ = _make_vault(mock_gcp_secretmanager, project_id="test-project")
+
+        monkeypatch.setenv("GCP_PROJECT_ID", "from-env-project")
+        # Re-import so from_env picks up the fresh env var.
+        import importlib
+        import sys
+
+        from implementations.vault.gcp_secret_manager import GCPSecretManagerVault as _V
+
+        importlib.reload(sys.modules[_V.__module__])
+        from implementations.vault.gcp_secret_manager import GCPSecretManagerVault as _V2
+
+        monkeypatch.setenv("GCP_PROJECT_ID", "env-project")
+        vault2 = _V2.from_env()
+        assert vault2._project_id == "env-project"
+
+    def test_from_env_raises_if_no_project_id(self, mock_gcp_secretmanager, monkeypatch):
+        monkeypatch.delenv("GCP_PROJECT_ID", raising=False)
+        from implementations.vault.gcp_secret_manager import GCPSecretManagerVault
+
+        with pytest.raises(ValueError, match="GCP_PROJECT_ID"):
+            GCPSecretManagerVault.from_env()
+
+
+# ── get_secret ────────────────────────────────────────────────────────────────
+
+
+class TestGetSecret:
+    def test_returns_secret_value(self, mock_gcp_secretmanager):
+        vault, mock_client = _make_vault(mock_gcp_secretmanager)
+        mock_response = MagicMock()
+        mock_response.payload.data = b"super-secret-value"
+        mock_client.access_secret_version.return_value = mock_response
+
+        result = vault.get_secret("CDP_SSH_KEY")
+        assert result == "super-secret-value"
+
+    def test_underscore_normalised_to_hyphen(self, mock_gcp_secretmanager):
+        """Underscores in key names are converted to hyphens for Secret Manager IDs."""
+        vault, mock_client = _make_vault(mock_gcp_secretmanager)
+        mock_response = MagicMock()
+        mock_response.payload.data = b"value"
+        mock_client.access_secret_version.return_value = mock_response
+
+        vault.get_secret("CDP_SSH_KEY")
+        call_args = mock_client.access_secret_version.call_args
+        name = call_args[1]["request"]["name"]
+        assert "CDP-SSH-KEY" in name
+        assert "_" not in name.split("/secrets/")[1]
+
+    def test_correct_resource_path_format(self, mock_gcp_secretmanager):
+        vault, mock_client = _make_vault(mock_gcp_secretmanager, project_id="my-project")
+        mock_response = MagicMock()
+        mock_response.payload.data = b"value"
+        mock_client.access_secret_version.return_value = mock_response
+
+        vault.get_secret("my-secret")
+        call_args = mock_client.access_secret_version.call_args
+        name = call_args[1]["request"]["name"]
+        assert name == "projects/my-project/secrets/my-secret/versions/latest"
+
+    def test_not_found_raises_vault_secret_not_found(self, mock_gcp_secretmanager):
+        vault, mock_client = _make_vault(mock_gcp_secretmanager)
+
+        class NotFound(Exception):
+            pass
+
+        mock_client.access_secret_version.side_effect = NotFound("404 not found")
+        with pytest.raises(VaultSecretNotFoundError):
+            vault.get_secret("missing-key")
+
+    def test_permission_denied_raises_vault_unavailable(self, mock_gcp_secretmanager):
+        vault, mock_client = _make_vault(mock_gcp_secretmanager)
+
+        class PermissionDenied(Exception):
+            pass
+
+        mock_client.access_secret_version.side_effect = PermissionDenied("403 permission denied")
+        with pytest.raises(VaultUnavailableError):
+            vault.get_secret("any-key")
+
+    def test_network_error_raises_vault_unavailable(self, mock_gcp_secretmanager):
+        vault, mock_client = _make_vault(mock_gcp_secretmanager)
+        mock_client.access_secret_version.side_effect = ConnectionError("network timeout")
+        with pytest.raises(VaultUnavailableError):
+            vault.get_secret("any-key")
+
+    def test_secret_decoded_as_utf8(self, mock_gcp_secretmanager):
+        vault, mock_client = _make_vault(mock_gcp_secretmanager)
+        mock_response = MagicMock()
+        mock_response.payload.data = "café".encode()
+        mock_client.access_secret_version.return_value = mock_response
+        assert vault.get_secret("key") == "café"
diff --git a/tests/unit/test_vertex_llm_client.py b/tests/unit/test_vertex_llm_client.py
new file mode 100644
index 0000000..79399bb
--- /dev/null
+++ b/tests/unit/test_vertex_llm_client.py
@@ -0,0 +1,200 @@
+"""Unit tests for VertexAILLMClient — both Claude-on-Vertex and Gemini paths."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from core.exceptions import LLMAuthError, LLMResponseError, LLMUnavailableError
+from implementations.llm.vertex_ai.llm_client import VertexAILLMClient
+
+MESSAGES = [{"role": "user", "content": "What is YARN?"}]
+PROJECT = "my-project"
+LOCATION = "europe-west1"
+
+
+# ── Routing ───────────────────────────────────────────────────────────────────
+
+
+class TestRouting:
+    def test_claude_model_routes_to_claude_path(self):
+        """Model names starting with 'claude' use AnthropicVertex."""
+        client = VertexAILLMClient("claude-sonnet@20250201", PROJECT, LOCATION)
+        assert client._is_claude is True
+
+    def test_gemini_model_routes_to_gemini_path(self):
+        """Model names not starting with 'claude' use the Gemini SDK."""
+        client = VertexAILLMClient("gemini-2.0-flash", PROJECT, LOCATION)
+        assert client._is_claude is False
+
+    def test_gemini_25_pro_routes_to_gemini_path(self):
+        client = VertexAILLMClient("gemini-2.5-pro", PROJECT, LOCATION)
+        assert client._is_claude is False
+
+
+# ── Claude-on-Vertex path ─────────────────────────────────────────────────────
+
+
+# Stub exception classes that mirror the real anthropic exception hierarchy.
+class _FakeAuthError(Exception):
+    pass
+
+
+class _FakeConnectionError(Exception):
+    pass
+
+
+class _FakeAPIStatusError(Exception):
+    status_code = 500
+
+
+class TestClaudeOnVertex:
+    def _make_client(self) -> VertexAILLMClient:
+        return VertexAILLMClient("claude-sonnet@20250201", PROJECT, LOCATION)
+
+    def _mock_response(self, text: str = "YARN manages cluster resources") -> MagicMock:
+        block = MagicMock()
+        block.text = text
+        response = MagicMock()
+        response.content = [block]
+        response.usage.input_tokens = 10
+        response.usage.output_tokens = 5
+        return response
+
+    def _patch_anthropic(self, mock_anthropic: MagicMock) -> None:
+        """Wire stub exception classes onto the mock anthropic module."""
+        mock_anthropic.AuthenticationError = _FakeAuthError
+        mock_anthropic.APIConnectionError = _FakeConnectionError
+        mock_anthropic.APIStatusError = _FakeAPIStatusError
+
+    @patch("implementations.llm.vertex_ai.llm_client.anthropic")
+    def test_complete_returns_text(self, mock_anthropic):
+        self._patch_anthropic(mock_anthropic)
+        mock_anthropic.AnthropicVertex.return_value.messages.create.return_value = (
+            self._mock_response()
+        )
+        client = self._make_client()
+        result = client.complete(MESSAGES)
+        assert result == "YARN manages cluster resources"
+
+    @patch("implementations.llm.vertex_ai.llm_client.anthropic")
+    def test_system_prompt_passed_through(self, mock_anthropic):
+        self._patch_anthropic(mock_anthropic)
+        mock_create = mock_anthropic.AnthropicVertex.return_value.messages.create
+        mock_create.return_value = self._mock_response()
+        client = self._make_client()
+        client.complete(MESSAGES, system="You are an expert.")
+        call_kwargs = mock_create.call_args[1]
+        assert call_kwargs["system"] == "You are an expert."
+
+    @patch("implementations.llm.vertex_ai.llm_client.anthropic")
+    def test_auth_error_raises_llm_auth_error(self, mock_anthropic):
+        self._patch_anthropic(mock_anthropic)
+        mock_anthropic.AnthropicVertex.return_value.messages.create.side_effect = _FakeAuthError(
+            "bad creds"
+        )
+        client = self._make_client()
+        with pytest.raises(LLMAuthError):
+            client.complete(MESSAGES)
+
+    @patch("implementations.llm.vertex_ai.llm_client.anthropic")
+    def test_connection_error_raises_llm_unavailable(self, mock_anthropic):
+        self._patch_anthropic(mock_anthropic)
+        mock_anthropic.AnthropicVertex.return_value.messages.create.side_effect = (
+            _FakeConnectionError("timeout")
+        )
+        client = self._make_client()
+        with pytest.raises(LLMUnavailableError):
+            client.complete(MESSAGES)
+
+    @patch("implementations.llm.vertex_ai.llm_client.anthropic")
+    def test_empty_response_raises_llm_response_error(self, mock_anthropic):
+        self._patch_anthropic(mock_anthropic)
+        response = MagicMock()
+        response.content = []
+        response.usage.input_tokens = 5
+        response.usage.output_tokens = 0
+        mock_anthropic.AnthropicVertex.return_value.messages.create.return_value = response
+        client = self._make_client()
+        with pytest.raises(LLMResponseError):
+            client.complete(MESSAGES)
+
+    @patch("implementations.llm.vertex_ai.llm_client.anthropic")
+    def test_max_tokens_forwarded(self, mock_anthropic):
+        self._patch_anthropic(mock_anthropic)
+        mock_create = mock_anthropic.AnthropicVertex.return_value.messages.create
+        mock_create.return_value = self._mock_response()
+        client = self._make_client()
+        client.complete(MESSAGES, max_tokens=512, temperature=0.5)
+        call_kwargs = mock_create.call_args[1]
+        assert call_kwargs["max_tokens"] == 512
+
+
+# ── Gemini path ───────────────────────────────────────────────────────────────
+
+
+class TestGemini:
+    def _make_client(self) -> VertexAILLMClient:
+        return VertexAILLMClient("gemini-2.0-flash", PROJECT, LOCATION)
+
+    def _mock_vertexai_modules(self):
+        """Return (mock_vertexai_module, mock_generative_model_class)."""
+        mock_vertexai = MagicMock()
+        mock_model_instance = MagicMock()
+        mock_response = MagicMock()
+        mock_response.text = "Gemini says: YARN manages resources"
+        mock_response.usage_metadata.prompt_token_count = 8
+        mock_response.usage_metadata.candidates_token_count = 6
+        mock_model_instance.generate_content.return_value = mock_response
+        return mock_vertexai, mock_model_instance, mock_response
+
+    @patch.dict(
+        "sys.modules",
+        {
+            "vertexai": MagicMock(),
+            "vertexai.generative_models": MagicMock(),
+        },
+    )
+    @patch("implementations.llm.vertex_ai.llm_client.vertexai", create=True)
+    def test_complete_returns_text(self, _):
+        """Gemini path returns response.text."""
+        import sys
+
+        mock_gm = sys.modules["vertexai.generative_models"]
+        mock_model_instance = MagicMock()
+        mock_response = MagicMock()
+        mock_response.text = "Gemini answer"
+        mock_response.usage_metadata.prompt_token_count = 5
+        mock_response.usage_metadata.candidates_token_count = 3
+        mock_model_instance.generate_content.return_value = mock_response
+        mock_gm.GenerativeModel.return_value = mock_model_instance
+
+        client = self._make_client()
+        with patch("implementations.llm.vertex_ai.llm_client.vertexai"):
+            with patch(
+                "implementations.llm.vertex_ai.llm_client.VertexAILLMClient._complete_gemini",
+                return_value="Gemini answer",
+            ):
+                result = client.complete(MESSAGES)
+
+        assert result == "Gemini answer"
+
+    def test_permission_denied_raises_llm_auth_error(self):
+        """A 403/PermissionDenied from Gemini maps to LLMAuthError."""
+        client = self._make_client()
+        with patch.object(client, "_complete_gemini", side_effect=LLMAuthError("403")):
+            with pytest.raises(LLMAuthError):
+                client.complete(MESSAGES)
+
+    def test_service_unavailable_raises_llm_unavailable(self):
+        """A ServiceUnavailable from Gemini maps to LLMUnavailableError."""
+        client = self._make_client()
+        with patch.object(client, "_complete_gemini", side_effect=LLMUnavailableError("down")):
+            with pytest.raises(LLMUnavailableError):
+                client.complete(MESSAGES)
+
+    def test_empty_response_raises_llm_response_error(self):
+        """An empty Gemini response maps to LLMResponseError."""
+        client = self._make_client()
+        with patch.object(client, "_complete_gemini", side_effect=LLMResponseError("empty")):
+            with pytest.raises(LLMResponseError):
+                client.complete(MESSAGES)