diff --git a/.dockerignore b/.dockerignore index 3c9ee01..567668f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,10 +1,13 @@ .git .github .planning +.env +.coverage .venv __pycache__ .pytest_cache +.mypy_cache .ruff_cache +*.py[cod] tests docs - diff --git a/.gitattributes b/.gitattributes index 624f233..37203db 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,3 +10,4 @@ Dockerfile text eol=lf *.md text eol=lf *.toml text eol=lf *.ps1 text eol=crlf +tests/contracts/cas-contracts/v0.1.0/*.json -text -diff diff --git a/Dockerfile b/Dockerfile index 88250db..1c9cad8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM python:3.12-slim AS runtime ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ PORT=8080 WORKDIR /app @@ -10,12 +11,12 @@ RUN addgroup --system app && adduser --system --ingroup app app COPY pyproject.toml README.md ./ COPY src ./src -RUN pip install --no-cache-dir . +RUN pip install --no-cache-dir --no-compile . USER app EXPOSE 8080 +STOPSIGNAL SIGTERM HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8080/health/live', timeout=2)" CMD ["uvicorn", "cas_reference_product.app:app", "--host", "0.0.0.0", "--port", "8080"] - diff --git a/deployment/cas-platform.interface.yaml b/deployment/cas-platform.interface.yaml index b8456e0..914294d 100644 --- a/deployment/cas-platform.interface.yaml +++ b/deployment/cas-platform.interface.yaml @@ -23,8 +23,3 @@ spec: - FOUNDRY_AGENT_NAME deploymentInjected: - APPLICATIONINSIGHTS_CONNECTION_STRING - outputsConsumed: - - workloadPrincipalId - - applicationInsightsId - - logAnalyticsWorkspaceId - diff --git a/docs/architecture.md b/docs/architecture.md index f3cd3e8..f66d83e 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -22,7 +22,12 @@ The Foundry call is isolated behind the `WorkflowAgentService` protocol. This ke ## Deployment Interface -`deployment/cas-platform.interface.yaml` records the contract: Linux AMD64 image, port 8080, internal ingress by default, system-assigned identity, probes, non-secret identifiers, and platform outputs. It does not deploy resources. +`deployment/cas-platform.interface.yaml` records the application contract: Linux AMD64 image, port +8080, internal ingress by default, system-assigned identity, probes, and configuration inputs. It does +not deploy resources. + +The application consumes only the environment values listed in that interface. Platform resource IDs +and principal IDs remain deployment-orchestration outputs and are not application configuration. ## Observability Boundaries @@ -30,4 +35,5 @@ The Foundry call is isolated behind the `WorkflowAgentService` protocol. This ke - `cas.workflow.execute` covers core orchestration. - `foundry.responses.create` covers the external Foundry call. - CAS correlation IDs are attached to workflow spans and canonical events preserve W3C trace context. - +- Broad Azure SDK and outbound HTTP auto-instrumentation is disabled to avoid capturing prompt or + output content. The application records only explicit boundary spans and safe identifiers. diff --git a/docs/operations.md b/docs/operations.md index f2aed64..1f37e99 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -9,13 +9,28 @@ Submit `examples/prompt-envelope.json` to `POST /api/v1/workflows`. Liveness is `/health/live`; readiness is `/health/ready`. +For a hardened local container run: + +```powershell +docker run --rm --read-only --tmpfs /tmp --cap-drop ALL --security-opt no-new-privileges ` + -p 8080:8080 cas-reference-product:local +``` + ## Foundry Mode Set `ENVIRONMENT` to `dev`, `test`, or `prod`; set `WORKFLOW_BACKEND=foundry`; provide the non-secret `FOUNDRY_PROJECT_ENDPOINT` and `FOUNDRY_AGENT_NAME`. The Azure-hosted workload uses its system-assigned managed identity. Do not configure API keys or client secrets. Readiness fails until required Foundry identifiers are present. Foundry connectivity is exercised only by workflow requests, not probes. +When `APPLICATIONINSIGHTS_CONNECTION_STRING` is supplied, telemetry export also authenticates with +the environment credential. Grant the system-assigned identity the minimum Azure Monitor publishing +role required by the deployment. Retry-file storage and broad outbound HTTP/SDK auto-instrumentation +are disabled; explicit spans do not record prompt or output content. + ## Platform Handoff Build a Linux AMD64 image and pass its immutable image reference to the `containerImage` parameter of `cas-platform`. Review `deployment/cas-platform.interface.yaml` before platform changes. This repository intentionally contains no Azure deployment command. +The Docker build context excludes local `.env` files and development artifacts. The application does +not consume platform resource IDs; deployment orchestration retains those outputs for RBAC and +operations workflows. diff --git a/docs/threat-model.md b/docs/threat-model.md index a5e5210..5ac7693 100644 --- a/docs/threat-model.md +++ b/docs/threat-model.md @@ -21,7 +21,7 @@ | Credential disclosure | No keys or tokens in code; system-assigned `ManagedIdentityCredential` in Azure | Operators must maintain least-privilege RBAC | | Legacy API use | Adapter uses project Responses client with `agent_reference`; no Classic Assistants code | SDK behavior must be reviewed during upgrades | | Prompt injection | Workflow treats prompts as untrusted data and exposes no tools in v0.1 | Downstream agent policy remains product-specific | -| Sensitive telemetry | Events contain identifiers and status, not prompt text or agent output | Operators must review SDK and platform log settings | +| Sensitive telemetry | Explicit spans contain safe identifiers only; broad outbound HTTP/SDK auto-instrumentation and retry-file storage are disabled | Operators must review platform log settings | | Unauthorized invocation | External ingress disabled by default in platform interface | Authentication gateway is product-specific and out of scope | | Supply-chain compromise | Pinned CI actions, lint, tests, non-root container | Dependency update review remains required | | Denial of service | Platform scaling bounds and request validation | Product-specific quotas and rate limits are not included | @@ -30,3 +30,5 @@ Grant the Container App system-assigned identity only the minimum Foundry project role needed to invoke the selected agent, scoped to the narrowest resource. Do not assign subscription-wide roles. +When Application Insights export is enabled, also grant the minimum Azure Monitor publishing role +required by the deployment at the narrowest telemetry resource scope. diff --git a/pyproject.toml b/pyproject.toml index f6a9433..c315082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ [project.optional-dependencies] dev = [ "httpx>=0.28.0", + "jsonschema>=4.23.0", "mypy>=1.14.0", "pytest>=8.3.0", "pytest-cov>=6.0.0", diff --git a/src/cas_reference_product/app.py b/src/cas_reference_product/app.py index 5bc09be..878ae65 100644 --- a/src/cas_reference_product/app.py +++ b/src/cas_reference_product/app.py @@ -7,7 +7,7 @@ from .config import Settings, get_settings from .models import PromptEnvelope, WorkflowResult from .telemetry import configure_telemetry -from .workflow import WorkflowOrchestrator, build_workflow_agent_service +from .workflow import WorkflowAgentServiceError, WorkflowOrchestrator, build_workflow_agent_service def create_app(settings: Settings | None = None) -> FastAPI: @@ -37,7 +37,10 @@ def execute(envelope: PromptEnvelope, request: Request) -> WorkflowResult: raise HTTPException(status_code=503, detail="Workflow backend is not ready") request.state.correlation_id = envelope.correlationId orchestrator = WorkflowOrchestrator(service, app_settings.repository) - return orchestrator.execute(envelope) + try: + return orchestrator.execute(envelope) + except WorkflowAgentServiceError: + raise HTTPException(status_code=502, detail="Workflow backend request failed") from None @app.get("/") def root() -> dict[str, Any]: diff --git a/src/cas_reference_product/config.py b/src/cas_reference_product/config.py index e8f853b..fa75c3e 100644 --- a/src/cas_reference_product/config.py +++ b/src/cas_reference_product/config.py @@ -1,3 +1,4 @@ +import re from functools import lru_cache from typing import Literal @@ -17,11 +18,21 @@ class Settings(BaseSettings): applicationinsights_connection_string: str | None = Field(default=None, repr=False) @property - def ready(self) -> bool: - return self.workflow_backend == "local" or bool( - self.foundry_project_endpoint and self.foundry_agent_name + def foundry_ready(self) -> bool: + return bool( + self.foundry_project_endpoint + and re.fullmatch( + r"https://[A-Za-z0-9.-]+\.services\.ai\.azure\.com/api/projects/[A-Za-z0-9_.-]+/?", + self.foundry_project_endpoint, + ) + and self.foundry_agent_name + and self.foundry_agent_name.strip() ) + @property + def ready(self) -> bool: + return self.workflow_backend == "local" or self.foundry_ready + @lru_cache def get_settings() -> Settings: diff --git a/src/cas_reference_product/models.py b/src/cas_reference_product/models.py index 3d866f9..a8bef22 100644 --- a/src/cas_reference_product/models.py +++ b/src/cas_reference_product/models.py @@ -1,20 +1,42 @@ from datetime import datetime -from typing import Literal +from typing import Annotated, Any, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + + +def reject_explicit_null(data: Any, fields: tuple[str, ...]) -> Any: + if isinstance(data, dict): + null_fields = [field for field in fields if field in data and data[field] is None] + if null_fields: + raise ValueError(f"{', '.join(null_fields)} must be omitted instead of null") + return data class Actor(BaseModel): model_config = ConfigDict(extra="forbid") id: str = Field(min_length=1, max_length=256) type: Literal["human", "agent", "service", "workflow"] - displayName: str | None = Field(default=None, min_length=1, max_length=256) + displayName: str | None = Field( + default=None, min_length=1, max_length=256, exclude_if=lambda value: value is None + ) + + @model_validator(mode="before") + @classmethod + def reject_null_display_name(cls, data: Any) -> Any: + return reject_explicit_null(data, ("displayName",)) class TraceContext(BaseModel): model_config = ConfigDict(extra="forbid") traceparent: str = Field(pattern=r"^[\da-f]{2}-[\da-f]{32}-[\da-f]{16}-[\da-f]{2}$") - tracestate: str | None = Field(default=None, max_length=512) + tracestate: str | None = Field( + default=None, max_length=512, exclude_if=lambda value: value is None + ) + + @model_validator(mode="before") + @classmethod + def reject_null_tracestate(cls, data: Any) -> Any: + return reject_explicit_null(data, ("tracestate",)) class LifecycleMetadata(BaseModel): @@ -33,7 +55,16 @@ class PromptEnvelope(LifecycleMetadata): kind: Literal["PromptEnvelope"] = "PromptEnvelope" intent: str = Field(min_length=1, max_length=256) prompt: str = Field(min_length=1, max_length=50_000) - constraints: list[str] = Field(default_factory=list) + constraints: list[Annotated[str, Field(min_length=1, max_length=1_000)]] = Field( + default_factory=list + ) + + @field_validator("constraints") + @classmethod + def constraints_must_be_unique(cls, constraints: list[str]) -> list[str]: + if len(constraints) != len(set(constraints)): + raise ValueError("constraints must contain unique values") + return constraints class RunEvent(LifecycleMetadata): @@ -42,11 +73,17 @@ class RunEvent(LifecycleMetadata): eventType: str = Field(min_length=1, max_length=128) sequence: int = Field(ge=0) status: Literal["queued", "running", "succeeded", "failed", "cancelled"] - message: str | None = Field(default=None, max_length=5_000) + message: str | None = Field( + default=None, max_length=5_000, exclude_if=lambda value: value is None + ) + + @model_validator(mode="before") + @classmethod + def reject_null_message(cls, data: Any) -> Any: + return reject_explicit_null(data, ("message",)) class WorkflowResult(BaseModel): runId: str output: str events: list[RunEvent] - diff --git a/src/cas_reference_product/telemetry.py b/src/cas_reference_product/telemetry.py index 16033ac..43e4b68 100644 --- a/src/cas_reference_product/telemetry.py +++ b/src/cas_reference_product/telemetry.py @@ -1,6 +1,7 @@ from opentelemetry import trace from .config import Settings +from .identity import build_credential def configure_telemetry(settings: Settings) -> None: @@ -9,13 +10,20 @@ def configure_telemetry(settings: Settings) -> None: configure_azure_monitor( connection_string=settings.applicationinsights_connection_string, + credential=build_credential(settings.environment), + disable_offline_storage=True, + instrumentation_options={ + "azure_sdk": {"enabled": False}, + "requests": {"enabled": False}, + "urllib": {"enabled": False}, + "urllib3": {"enabled": False}, + }, service_name=settings.app_name, ) -def current_traceparent() -> str: +def current_traceparent(fallback: str) -> str: context = trace.get_current_span().get_span_context() if context.is_valid: return f"00-{context.trace_id:032x}-{context.span_id:016x}-01" - return "00-00000000000000000000000000000001-0000000000000001-00" - + return fallback diff --git a/src/cas_reference_product/workflow.py b/src/cas_reference_product/workflow.py index 798db01..04e751b 100644 --- a/src/cas_reference_product/workflow.py +++ b/src/cas_reference_product/workflow.py @@ -20,6 +20,10 @@ class WorkflowAgentService(Protocol): def run(self, envelope: PromptEnvelope) -> str: ... +class WorkflowAgentServiceError(RuntimeError): + """Stable application error raised when an external workflow backend fails.""" + + class LocalWorkflowAgentService: def run(self, envelope: PromptEnvelope) -> str: return ( @@ -32,20 +36,30 @@ class FoundryWorkflowAgentService: """Invoke a Foundry Next Gen agent reference through the project Responses client.""" def __init__(self, settings: Settings) -> None: - if not settings.foundry_project_endpoint or not settings.foundry_agent_name: - raise ValueError("Foundry backend requires project endpoint and agent name") - self._agent_name = settings.foundry_agent_name + endpoint = settings.foundry_project_endpoint + agent_name = settings.foundry_agent_name + if not settings.foundry_ready or endpoint is None or agent_name is None: + raise ValueError("Foundry backend requires a valid project endpoint and agent name") + self._agent_name = agent_name self._client = AIProjectClient( - endpoint=settings.foundry_project_endpoint, + endpoint=endpoint, credential=build_credential(settings.environment), ).get_openai_client() def run(self, envelope: PromptEnvelope) -> str: with tracer.start_as_current_span("foundry.responses.create"): - response = self._client.responses.create( - input=envelope.prompt, - extra_body={"agent": {"name": self._agent_name, "type": "agent_reference"}}, - ) + try: + response = self._client.responses.create( + input=envelope.prompt, + extra_body={ + "agent_reference": { + "name": self._agent_name, + "type": "agent_reference", + } + }, + ) + except Exception: + raise WorkflowAgentServiceError("Foundry workflow invocation failed") from None return response.output_text @@ -92,6 +106,9 @@ def _event( status: RunStatus, message: str, ) -> RunEvent: + trace_context = {"traceparent": current_traceparent(envelope.traceContext.traceparent)} + if envelope.traceContext.tracestate is not None: + trace_context["tracestate"] = envelope.traceContext.tracestate return RunEvent( correlationId=envelope.correlationId, promptId=envelope.promptId, @@ -99,7 +116,7 @@ def _event( repo=self._repository, actor=Actor(id="cas-reference-workflow", type="workflow"), timestamp=self._clock(), - traceContext=TraceContext(traceparent=current_traceparent()), + traceContext=TraceContext.model_validate(trace_context), eventType=event_type, sequence=sequence, status=status, diff --git a/tests/contracts/cas-contracts/v0.1.0/artifact-manifest.schema.json b/tests/contracts/cas-contracts/v0.1.0/artifact-manifest.schema.json new file mode 100644 index 0000000..c399640 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/artifact-manifest.schema.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/artifact-manifest.schema.json", + "title": "ArtifactManifest", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "artifacts" + ], + "properties": { + "kind": { + "const": "ArtifactManifest" + }, + "artifacts": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "common.schema.json#/$defs/evidence" + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/contracts/cas-contracts/v0.1.0/common.schema.json b/tests/contracts/cas-contracts/v0.1.0/common.schema.json new file mode 100644 index 0000000..0eec265 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/common.schema.json @@ -0,0 +1,68 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/common.schema.json", + "title": "CAS Common Definitions", + "$defs": { + "actor": { + "type": "object", + "additionalProperties": false, + "required": ["id", "type"], + "properties": { + "id": { "type": "string", "minLength": 1, "maxLength": 256 }, + "type": { + "type": "string", + "enum": ["human", "agent", "service", "workflow"] + }, + "displayName": { "type": "string", "minLength": 1, "maxLength": 256 } + } + }, + "traceContext": { + "type": "object", + "additionalProperties": false, + "required": ["traceparent"], + "properties": { + "traceparent": { + "type": "string", + "pattern": "^[\\da-f]{2}-[\\da-f]{32}-[\\da-f]{16}-[\\da-f]{2}$" + }, + "tracestate": { "type": "string", "maxLength": 512 } + } + }, + "lifecycleMetadata": { + "type": "object", + "required": [ + "correlationId", + "promptId", + "runId", + "repo", + "actor", + "timestamp", + "schemaVersion", + "traceContext" + ], + "properties": { + "correlationId": { "type": "string", "minLength": 1, "maxLength": 128 }, + "promptId": { "type": "string", "minLength": 1, "maxLength": 128 }, + "runId": { "type": "string", "minLength": 1, "maxLength": 128 }, + "repo": { + "type": "string", + "pattern": "^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$" + }, + "actor": { "$ref": "#/$defs/actor" }, + "timestamp": { "type": "string", "format": "date-time" }, + "schemaVersion": { "const": "0.1.0" }, + "traceContext": { "$ref": "#/$defs/traceContext" } + } + }, + "evidence": { + "type": "object", + "additionalProperties": false, + "required": ["kind", "uri"], + "properties": { + "kind": { "type": "string", "minLength": 1, "maxLength": 64 }, + "uri": { "type": "string", "format": "uri" }, + "sha256": { "type": "string", "pattern": "^[\\da-f]{64}$" } + } + } + } +} diff --git a/tests/contracts/cas-contracts/v0.1.0/evaluation-result.schema.json b/tests/contracts/cas-contracts/v0.1.0/evaluation-result.schema.json new file mode 100644 index 0000000..5d49d77 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/evaluation-result.schema.json @@ -0,0 +1,45 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/evaluation-result.schema.json", + "title": "EvaluationResult", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "evaluator", + "outcome", + "metrics" + ], + "properties": { + "kind": { + "const": "EvaluationResult" + }, + "evaluator": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "outcome": { + "enum": [ + "passed", + "failed", + "inconclusive" + ] + }, + "metrics": { + "type": "object", + "minProperties": 1, + "additionalProperties": { + "type": "number" + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/contracts/cas-contracts/v0.1.0/manifest.json b/tests/contracts/cas-contracts/v0.1.0/manifest.json new file mode 100644 index 0000000..58b90c2 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/manifest.json @@ -0,0 +1,45 @@ +{ + "version": "0.1.0", + "schemas": [ + { + "id": "https://schemas.coding-autopilot.dev/v0.1/artifact-manifest.schema.json", + "path": "artifact-manifest.schema.json", + "sha256": "6c2192fb6ca79843361695f3dccd5ebf1dda8ce320c974db2630b0a2b78705bd" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/common.schema.json", + "path": "common.schema.json", + "sha256": "c7ce72a6f5da8394e48f2421820588a8142546962e05152997bd1e6ced994928" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/evaluation-result.schema.json", + "path": "evaluation-result.schema.json", + "sha256": "12e8019f858dc0cda80ac8994ca1251a9a229a8642b05706227bd9c995c5799a" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/policy-decision.schema.json", + "path": "policy-decision.schema.json", + "sha256": "9cf259d405664560b30b7afd25d08d888337fd9a81a6a388ceeb36cbf7edcc33" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/prompt-envelope.schema.json", + "path": "prompt-envelope.schema.json", + "sha256": "58a991862031f40c2ffcc073743776d05206684534fac2c20c67c274fbf05c84" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/run-event.schema.json", + "path": "run-event.schema.json", + "sha256": "1bcaee2ff546439ea81d64d57dcb51cd9f57dd790fe300fd90e251823e27a36f" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/verification-result.schema.json", + "path": "verification-result.schema.json", + "sha256": "aeff3d41eeb99b3017b460c2a2a8d07b4e1c828830b72853d5f41d93d6abe012" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/work-request.schema.json", + "path": "work-request.schema.json", + "sha256": "543e1dd2313cff2a1ebeb1470db3085b89c3933694231eb5b4798096f147e4a7" + } + ] +} diff --git a/tests/contracts/cas-contracts/v0.1.0/policy-decision.schema.json b/tests/contracts/cas-contracts/v0.1.0/policy-decision.schema.json new file mode 100644 index 0000000..38deb32 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/policy-decision.schema.json @@ -0,0 +1,47 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/policy-decision.schema.json", + "title": "PolicyDecision", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "decision", + "policyVersion", + "reasons" + ], + "properties": { + "kind": { + "const": "PolicyDecision" + }, + "decision": { + "enum": [ + "allow", + "deny", + "require-approval" + ] + }, + "policyVersion": { + "type": "string", + "minLength": 1, + "maxLength": 64 + }, + "reasons": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1, + "maxLength": 1000 + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/contracts/cas-contracts/v0.1.0/prompt-envelope.schema.json b/tests/contracts/cas-contracts/v0.1.0/prompt-envelope.schema.json new file mode 100644 index 0000000..4f3f395 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/prompt-envelope.schema.json @@ -0,0 +1,44 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/prompt-envelope.schema.json", + "title": "PromptEnvelope", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "intent", + "prompt" + ], + "properties": { + "kind": { + "const": "PromptEnvelope" + }, + "intent": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "prompt": { + "type": "string", + "minLength": 1, + "maxLength": 50000 + }, + "constraints": { + "type": "array", + "items": { + "type": "string", + "minLength": 1, + "maxLength": 1000 + }, + "uniqueItems": true + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/contracts/cas-contracts/v0.1.0/run-event.schema.json b/tests/contracts/cas-contracts/v0.1.0/run-event.schema.json new file mode 100644 index 0000000..204c518 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/run-event.schema.json @@ -0,0 +1,48 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/run-event.schema.json", + "title": "RunEvent", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "eventType", + "sequence", + "status" + ], + "properties": { + "kind": { + "const": "RunEvent" + }, + "eventType": { + "type": "string", + "minLength": 1, + "maxLength": 128 + }, + "sequence": { + "type": "integer", + "minimum": 0 + }, + "status": { + "enum": [ + "queued", + "running", + "succeeded", + "failed", + "cancelled" + ] + }, + "message": { + "type": "string", + "maxLength": 5000 + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/contracts/cas-contracts/v0.1.0/verification-result.schema.json b/tests/contracts/cas-contracts/v0.1.0/verification-result.schema.json new file mode 100644 index 0000000..178df29 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/verification-result.schema.json @@ -0,0 +1,62 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/verification-result.schema.json", + "title": "VerificationResult", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "outcome", + "checks" + ], + "properties": { + "kind": { + "const": "VerificationResult" + }, + "outcome": { + "enum": [ + "passed", + "failed", + "inconclusive" + ] + }, + "checks": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "outcome" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "outcome": { + "enum": [ + "passed", + "failed", + "skipped" + ] + }, + "evidenceUri": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/contracts/cas-contracts/v0.1.0/work-request.schema.json b/tests/contracts/cas-contracts/v0.1.0/work-request.schema.json new file mode 100644 index 0000000..16d9da3 --- /dev/null +++ b/tests/contracts/cas-contracts/v0.1.0/work-request.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/work-request.schema.json", + "title": "WorkRequest", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "workType", + "objective", + "targetRef", + "riskLevel" + ], + "properties": { + "kind": { + "const": "WorkRequest" + }, + "workType": { + "enum": [ + "analyze", + "change", + "verify", + "repair", + "deploy" + ] + }, + "objective": { + "type": "string", + "minLength": 1, + "maxLength": 5000 + }, + "targetRef": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "riskLevel": { + "enum": [ + "low", + "medium", + "high", + "critical" + ] + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/tests/test_api.py b/tests/test_api.py index 60f1fd3..4be7dbd 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,7 +1,15 @@ +from unittest.mock import patch + from fastapi.testclient import TestClient from cas_reference_product.app import create_app from cas_reference_product.config import Settings +from cas_reference_product.workflow import WorkflowAgentServiceError + + +class FailingExternalService: + def run(self, envelope) -> str: + raise WorkflowAgentServiceError("sensitive provider detail") def test_workflow_api_emits_canonical_events(envelope) -> None: @@ -31,3 +39,32 @@ def test_incomplete_foundry_configuration_is_not_ready() -> None: assert client.get("/health/live").status_code == 200 assert client.get("/health/ready").status_code == 503 assert client.post("/api/v1/workflows", json={}).status_code == 422 + + +def test_invalid_foundry_endpoint_is_not_ready() -> None: + client = TestClient( + create_app( + Settings( + workflow_backend="foundry", + foundry_project_endpoint="https://example.invalid/project", + foundry_agent_name="cas-reference-agent", + ) + ) + ) + + assert client.get("/health/live").status_code == 200 + assert client.get("/health/ready").status_code == 503 + + +def test_workflow_api_sanitizes_external_service_failures(envelope) -> None: + with patch( + "cas_reference_product.app.build_workflow_agent_service", + return_value=FailingExternalService(), + ): + client = TestClient(create_app(Settings())) + + response = client.post("/api/v1/workflows", json=envelope.model_dump(mode="json")) + + assert response.status_code == 502 + assert response.json() == {"detail": "Workflow backend request failed"} + assert "sensitive" not in response.text diff --git a/tests/test_contract_registry.py b/tests/test_contract_registry.py new file mode 100644 index 0000000..df6672d --- /dev/null +++ b/tests/test_contract_registry.py @@ -0,0 +1,48 @@ +import hashlib +import json +from pathlib import Path +from typing import Any + +from jsonschema import Draft202012Validator +from referencing import Registry, Resource + +from cas_reference_product.workflow import LocalWorkflowAgentService, WorkflowOrchestrator + +CONTRACT_ROOT = Path(__file__).parent / "contracts" / "cas-contracts" / "v0.1.0" + + +def load_json(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def contract_registry() -> Registry[Any]: + resources = [] + for path in CONTRACT_ROOT.glob("*.schema.json"): + schema = load_json(path) + resources.append((schema["$id"], Resource.from_contents(schema))) + return Registry().with_resources(resources) + + +def assert_valid(schema_name: str, instance: dict[str, Any]) -> None: + schema = load_json(CONTRACT_ROOT / schema_name) + Draft202012Validator(schema, registry=contract_registry()).validate(instance) + + +def test_vendored_contract_release_matches_manifest_hashes() -> None: + manifest = load_json(CONTRACT_ROOT / "manifest.json") + + assert manifest["version"] == "0.1.0" + for entry in manifest["schemas"]: + content = (CONTRACT_ROOT / entry["path"]).read_bytes() + assert hashlib.sha256(content).hexdigest() == entry["sha256"] + + +def test_prompt_envelope_serialization_conforms_to_v010_registry(envelope) -> None: + assert_valid("prompt-envelope.schema.json", envelope.model_dump(mode="json")) + + +def test_run_event_serialization_conforms_to_v010_registry(envelope) -> None: + result = WorkflowOrchestrator(LocalWorkflowAgentService(), envelope.repo).execute(envelope) + + for event in result.events: + assert_valid("run-event.schema.json", event.model_dump(mode="json")) diff --git a/tests/test_models.py b/tests/test_models.py index b12671b..a9b4179 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from cas_reference_product.models import PromptEnvelope +from cas_reference_product.models import Actor, PromptEnvelope, RunEvent, TraceContext def test_prompt_envelope_rejects_extra_properties(envelope) -> None: @@ -19,3 +19,54 @@ def test_prompt_envelope_matches_cas_contract_metadata(envelope) -> None: assert payload["schemaVersion"] == "0.1.0" assert payload["traceContext"]["traceparent"].startswith("00-") + +@pytest.mark.parametrize( + "constraints", + [ + [""], + ["x" * 1_001], + ["No secrets", "No secrets"], + ], +) +def test_prompt_envelope_enforces_cas_contract_constraints(envelope, constraints) -> None: + payload = envelope.model_dump() + payload["constraints"] = constraints + + with pytest.raises(ValidationError): + PromptEnvelope.model_validate(payload) + + +@pytest.mark.parametrize( + ("model", "payload"), + [ + (Actor, {"id": "developer", "type": "human", "displayName": None}), + ( + TraceContext, + { + "traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", + "tracestate": None, + }, + ), + ( + RunEvent, + { + "correlationId": "corr-001", + "promptId": "prompt-001", + "runId": "run-001", + "repo": "Coding-Autopilot-System/cas-reference-product", + "actor": {"id": "workflow", "type": "workflow"}, + "timestamp": "2026-06-11T00:00:00Z", + "traceContext": { + "traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01" + }, + "eventType": "workflow.started", + "sequence": 0, + "status": "running", + "message": None, + }, + ), + ], +) +def test_contract_models_reject_explicit_null_optional_fields(model, payload) -> None: + with pytest.raises(ValidationError): + model.model_validate(payload) diff --git a/tests/test_runtime_contracts.py b/tests/test_runtime_contracts.py new file mode 100644 index 0000000..f2606f6 --- /dev/null +++ b/tests/test_runtime_contracts.py @@ -0,0 +1,27 @@ +from pathlib import Path + +ROOT = Path(__file__).parents[1] + + +def test_docker_build_context_excludes_local_configuration() -> None: + ignored = (ROOT / ".dockerignore").read_text(encoding="utf-8").splitlines() + + assert ".env" in ignored + assert ".coverage" in ignored + assert ".venv" in ignored + + +def test_container_runs_non_root_with_graceful_stop() -> None: + dockerfile = (ROOT / "Dockerfile").read_text(encoding="utf-8") + + assert "\nUSER app\n" in dockerfile + assert "\nSTOPSIGNAL SIGTERM\n" in dockerfile + assert "--no-cache-dir --no-compile" in dockerfile + + +def test_platform_interface_lists_only_application_inputs() -> None: + interface = (ROOT / "deployment" / "cas-platform.interface.yaml").read_text(encoding="utf-8") + + assert "outputsConsumed" not in interface + assert "APPLICATIONINSIGHTS_CONNECTION_STRING" in interface + assert "targetPort: 8080" in interface diff --git a/tests/test_service_factory.py b/tests/test_service_factory.py index 122a521..0c3c664 100644 --- a/tests/test_service_factory.py +++ b/tests/test_service_factory.py @@ -1,9 +1,12 @@ from unittest.mock import patch +import pytest + from cas_reference_product.config import Settings from cas_reference_product.workflow import ( FoundryWorkflowAgentService, LocalWorkflowAgentService, + WorkflowAgentServiceError, build_workflow_agent_service, ) @@ -24,6 +27,18 @@ def test_factory_builds_foundry_service_when_configured() -> None: service.assert_called_once_with(settings) +def test_foundry_service_rejects_invalid_project_endpoint() -> None: + settings = Settings( + environment="prod", + workflow_backend="foundry", + foundry_project_endpoint="https://example.invalid/project", + foundry_agent_name="cas-reference-agent", + ) + + with pytest.raises(ValueError, match="valid project endpoint"): + FoundryWorkflowAgentService(settings) + + def test_foundry_service_uses_next_gen_agent_reference(envelope) -> None: settings = Settings( environment="prod", @@ -46,6 +61,31 @@ def test_foundry_service_uses_next_gen_agent_reference(envelope) -> None: ) responses.create.assert_called_once_with( input=envelope.prompt, - extra_body={"agent": {"name": "cas-reference-agent", "type": "agent_reference"}}, + extra_body={ + "agent_reference": { + "name": "cas-reference-agent", + "type": "agent_reference", + } + }, ) assert result == "Foundry result" + + +def test_foundry_service_sanitizes_sdk_failure(envelope) -> None: + settings = Settings( + environment="prod", + workflow_backend="foundry", + foundry_project_endpoint="https://example.services.ai.azure.com/api/projects/example", + foundry_agent_name="cas-reference-agent", + ) + with ( + patch("cas_reference_product.workflow.build_credential"), + patch("cas_reference_product.workflow.AIProjectClient") as project_client, + ): + project_client.return_value.get_openai_client.return_value.responses.create.side_effect = ( + RuntimeError("sensitive provider detail") + ) + service = FoundryWorkflowAgentService(settings) + + with pytest.raises(WorkflowAgentServiceError, match="Foundry workflow invocation failed"): + service.run(envelope) diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index 804ae01..071c19f 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -5,15 +5,46 @@ def test_telemetry_is_noop_without_application_insights() -> None: - configure_telemetry(Settings()) + with patch("cas_reference_product.telemetry.build_credential") as credential: + configure_telemetry(Settings()) + credential.assert_not_called() -def test_invalid_span_produces_valid_unsampled_traceparent() -> None: +def test_telemetry_uses_identity_and_privacy_hardening() -> None: + settings = Settings( + environment="prod", + applicationinsights_connection_string=( + "InstrumentationKey=00000000-0000-0000-0000-000000000000" + ), + ) + with ( + patch("cas_reference_product.telemetry.build_credential") as credential, + patch("azure.monitor.opentelemetry.configure_azure_monitor") as configure, + ): + configure_telemetry(settings) + + credential.assert_called_once_with("prod") + configure.assert_called_once_with( + connection_string=settings.applicationinsights_connection_string, + credential=credential.return_value, + disable_offline_storage=True, + instrumentation_options={ + "azure_sdk": {"enabled": False}, + "requests": {"enabled": False}, + "urllib": {"enabled": False}, + "urllib3": {"enabled": False}, + }, + service_name=settings.app_name, + ) + + +def test_invalid_span_preserves_incoming_traceparent() -> None: + incoming = "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01" with patch("cas_reference_product.telemetry.trace.get_current_span") as current: span = MagicMock() span.get_span_context.return_value.is_valid = False current.return_value = span - value = current_traceparent() + value = current_traceparent(incoming) - assert value == "00-00000000000000000000000000000001-0000000000000001-00" + assert value == incoming diff --git a/tests/test_workflow.py b/tests/test_workflow.py index dc4b34b..df0bb91 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -27,9 +27,9 @@ def test_orchestrator_returns_traceable_events(envelope) -> None: "workflow.completed", ] assert all(event.timestamp == fixed for event in result.events) + assert all(event.traceContext == envelope.traceContext for event in result.events) def test_orchestrator_propagates_failure(envelope) -> None: with pytest.raises(RuntimeError, match="expected"): WorkflowOrchestrator(FailingService(), envelope.repo).execute(envelope) -