From e6d1b98d2d7ac9476ff5d95140d2cf7e2d1c191a Mon Sep 17 00:00:00 2001 From: klickd agent Date: Tue, 2 Jun 2026 11:14:16 +0000 Subject: [PATCH 1/8] docs: document x.klickd supply chain protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the x.klickd skill-pack build process into a single NON-NORMATIVE spec + operator quickstart. No release, no automation claim beyond the per-stage tool/manual/planned labels. - docs/rfcs/chimera/SUPPLY_CHAIN.md: 18-stage pipeline, version lineage, rollback/deprecation/approval-revocation, determinism + reproducibility, anti-mirage protocol, audit-trail index. Explicit claim boundary (no universal standard, no automatic GDPR/EU-AI-Act compliance, no proven benchmark superiority; 70-80% is a design target, not a guarantee). - docs/rfcs/chimera/packs/QUICKSTART.md: human-or-agent build/audit loop, multi-agent role split, shipped verification commands, truth-boundary warning (artifact_loaded + sha256_matches_manifest). - Pointer links added in RFC-009 §12, chimera/README §6, packs/README §7. Docs-only. No schema/SDK/package/CI change. No existing claim altered. Co-Authored-By: Claude Opus 4.7 --- docs/rfcs/RFC-009-chimera-v4.1.md | 2 + docs/rfcs/chimera/README.md | 2 + docs/rfcs/chimera/SUPPLY_CHAIN.md | 190 ++++++++++++++++++++++++++ docs/rfcs/chimera/packs/QUICKSTART.md | 91 ++++++++++++ docs/rfcs/chimera/packs/README.md | 2 + 5 files changed, 287 insertions(+) create mode 100644 docs/rfcs/chimera/SUPPLY_CHAIN.md create mode 100644 docs/rfcs/chimera/packs/QUICKSTART.md diff --git a/docs/rfcs/RFC-009-chimera-v4.1.md b/docs/rfcs/RFC-009-chimera-v4.1.md index d3a3297..e8cb8f2 100644 --- a/docs/rfcs/RFC-009-chimera-v4.1.md +++ b/docs/rfcs/RFC-009-chimera-v4.1.md @@ -363,6 +363,8 @@ Promotion to `Accepted` requires (at minimum) one P0 pack to demonstrate §8 is **Companion docs:** +- [`docs/rfcs/chimera/SUPPLY_CHAIN.md`](./chimera/SUPPLY_CHAIN.md) — **NON-NORMATIVE** supply-chain build-process spec: the 18-stage pipeline (each stage labelled `tool` / `manual` / `planned`), version lineage, rollback / deprecation / approval-revocation, determinism + reproducibility, anti-mirage protocol, and audit-trail index. Specifies the *process* that produces and gates the §8 artefact. +- [`docs/rfcs/chimera/packs/QUICKSTART.md`](./chimera/packs/QUICKSTART.md) — **NON-NORMATIVE** operator quickstart (human or agent) for the build-and-audit loop, multi-agent role split, and shipped verification commands. - [`docs/rfcs/chimera/README.md`](./chimera/README.md) — pack scope table and validation criteria summary. - [`docs/rfcs/chimera/packs/README.md`](./chimera/packs/README.md) — concrete pack index, `/klickdskill` later-notes, no-fake-catalog reminder. - [`docs/rfcs/chimera/frameworks/README.md`](./chimera/frameworks/README.md) — canonical framework registry (ESCO v1.1.1, DigComp 2.2, LifeComp 2020, EQF 2017, CEFR 2020, WEF, O\*NET, NICE, ENISA, CIS, SFIA) with stable URLs / IRI prefixes / distribution URLs / SHA-256 placeholders, plus the offline SKOS/JSON-LD bundle shape. diff --git a/docs/rfcs/chimera/README.md b/docs/rfcs/chimera/README.md index fcd52c5..1c02438 100644 --- a/docs/rfcs/chimera/README.md +++ b/docs/rfcs/chimera/README.md @@ -124,6 +124,8 @@ See [`packs/README.md`](./packs/README.md) for the full pack index, no-fake-cata ## 6. Pointers +- Supply-chain build-process spec (NON-NORMATIVE): [`SUPPLY_CHAIN.md`](./SUPPLY_CHAIN.md) — 18-stage pipeline, lineage, rollback/deprecation, determinism, anti-mirage protocol, audit index. +- Operator quickstart (human or agent, NON-NORMATIVE): [`packs/QUICKSTART.md`](./packs/QUICKSTART.md) - Full RFC: [`docs/rfcs/RFC-009-chimera-v4.1.md`](../RFC-009-chimera-v4.1.md) - Concrete pack scaffolds: [`packs/`](./packs/) (index: [`packs/README.md`](./packs/README.md)) - First concrete pack: [`packs/student.md`](./packs/student.md) — `x.klickd/student` diff --git a/docs/rfcs/chimera/SUPPLY_CHAIN.md b/docs/rfcs/chimera/SUPPLY_CHAIN.md new file mode 100644 index 0000000..205749c --- /dev/null +++ b/docs/rfcs/chimera/SUPPLY_CHAIN.md @@ -0,0 +1,190 @@ +# x.klickd supply chain — skill-pack build process spec + +> **Status:** Draft · **NON-NORMATIVE** · companion to [`RFC-009-chimera-v4.1.md`](../RFC-009-chimera-v4.1.md). +> **Triggers no release.** No tag, no `latest` on npm/PyPI, no Zenodo DOI, no IANA action, no schema change, no automation claim beyond the per-stage `tool` / `manual` / `planned` labels in §3. +> **Internal naming.** This document uses the public name **`x.klickd`** throughout. The internal track codename used in sibling directory/file paths is an internal identifier only and MUST NOT be propagated to public surfaces (README, `docs/public/*`, package metadata). + +This document describes the **process** by which an `x.klickd` candidate `carrier_pack` is built, audited, and (if it passes) promoted. RFC-009 specifies the *artefact* (architecture, the ten validation criteria in §8/§8.1, the v4.1-native shape). This spec describes the *pipeline that produces and gates that artefact*. The two are companions: RFC-009 says what a pack must be; this says how a pack is made and checked. + +--- + +## 0. Scope and claim boundary + +This is a process specification, not a runnable end-to-end system today. Read the per-stage labels in §3 literally. + +What this document does **not** claim: + +- **No universal standard.** `x.klickd` is a format and a process, not an industry standard. No "universal standard" claim is made or implied. +- **No automatic compliance.** Nothing here delivers automatic GDPR / EU AI Act / sectoral compliance. License and boundary checks (§3 stages 11, 14) are *engineering checks*, not legal compliance attestations. +- **No proven benchmark superiority.** §9 describes benchmark *intent*. No "benchmark superiority proven" claim is made without external, reproducible evidence. +- **No loaded-skill claim without proof.** A pack is "loaded" / "used" only when `artifact_loaded = true` **and** `sha256_matches_manifest = true` per [`skill-loader-protocol.md`](../../integrations/skill-loader-protocol.md). A catalog entry, a stub, a routing placeholder, or a marketing page is never a loaded skill. + +The **70–80% automation** figure in the brief is a **design target**, not a guarantee and not a measured result. As of this draft, only a subset of the pipeline is backed by shipped tooling (see §3). The honest current state is: the build *process* is specified end-to-end; the build *runner* is partial. + +--- + +## 1. Build request (input) + +The pipeline input is a normalised **build request**: configuration only. + +- **Config-only.** Names the target pack id (`x.klickd/`), the framework backbone to anchor against (RFC-009 §5.7), the track (P0 / P1), and the requested tier (Lite / Pro). +- **No PII, no secrets.** Build requests and every artefact they produce are publisher-owned. No user memory, sessions, consent, or PII enters the pipeline (RFC-009 §8 criterion 4). +- **Deterministic identity.** The build request is hashed to an `input_hash`. The same `input_hash` is expected to produce the same `output_hash` (§7). + +**Outputs of a build are audit artefacts**, not just a pack file: the candidate pack, a diff report (§5), a determinism record (§7), and an audit-trail entry (§10). + +--- + +## 2. Human / agent configuration and the gated premium pass + +The pipeline is configurable by a **human or an agent** through the build request. The configuration surface is the *only* place an operator shapes the output before audit. + +The **premium pass** — human or agent intervention to lift a candidate to reference quality — is restricted to the **last layer only** and is **gated**: + +- It runs **only after** the candidate has been built and audited (§8). +- It runs **only if explicitly requested**. +- It edits the candidate's final layer; it does not silently re-run earlier stages or compensate for a failed build. + +This gating is what keeps the benchmark (§9) honest: a premium pass that ran before the audit would mask how much the pipeline actually produced. + +--- + +## 3. The 18-stage pipeline + +Each stage is labelled with its current backing: + +- **`tool`** — backed by shipped, runnable tooling in this repo. +- **`manual`** — performed by a human/agent reviewer; no automation claimed. +- **`planned`** — specified here but not yet backed by a shipped artefact. **Not automated today.** + +| # | Stage | Backing | Notes | +|---|---|---|---| +| 1 | Build request normalisation | `manual` | Config-only; produces `input_hash` (§1). | +| 2 | Source intake | `manual` | Pull framework backbone refs (ESCO / WEF / O\*NET / DigComp / EQF / CEFR …) from the registry. | +| 3 | Domain mapping | `tool` (partial) | Candidate↔framework mapping validated by [`scripts/validate_v4_1_candidate_mapping.py`](../../../scripts/validate_v4_1_candidate_mapping.py). | +| 4 | Foundation / transversal grafting | `manual` | Graft the foundation + transversal competency floor (RFC-009 architecture). | +| 5 | Context-graph generation | `planned` | Required graph shape in §3.1; generator not yet shipped. | +| 6 | Governance & security injection | `manual` | Inject gate defaults + veto posture (RFC-009 §8 criteria 2, 8). | +| 7 | Evidence binding | `manual` | Every claim declares a grounding rule (RFC-009 §8 criterion 3). | +| 8 | Candidate skill generation | `planned` | Assemble the candidate pack artefact; assembler not yet shipped. | +| 9 | Diff report | `planned` | Logical diff vs prior version (§5); generator not yet shipped. | +| 10 | Threat model | `manual` | Per-pack; sized to risk (high-risk packs get a fuller model — §6). | +| 11 | License check | `manual` | Reject framework/source content with an incompatible licence. Engineering check, not legal attestation. | +| 12 | Source freshness check | `manual` | Flag references that have expired or changed upstream. | +| 13 | PII / secrets scan | `planned` | Scan candidate outputs **and** build logs; scanner not yet shipped. | +| 14 | Private / public boundary check | `manual` | No internal codename / confidential structure in any public-facing field. | +| 15 | Determinism & reproducibility checks | `tool` (partial) | Pack-level hash verification shipped: [`scripts/verify_xklickd_skill_packs.py`](../../../scripts/verify_xklickd_skill_packs.py). Build-level reproducibility record is `planned` (§7). | +| 16 | Human / agent premium pass | `manual`, gated | Last layer only, post-audit, if requested (§2). | +| 17 | Release candidate | `manual` | Promote `reviewed → release_candidate` (§4). Triggers no public release. | +| 18 | Stable promotion or rejection | `manual` | Promote `release_candidate → stable` or reject. Gated by RFC-009 §8 (all ten criteria) + the acceptance checklist. | + +> No stage above is claimed as automated unless its row says `tool`. Stages marked `planned` are specification only and must not be presented as working automation. + +### 3.1 Required context-graph shape + +Every premium candidate must carry a context graph with at least: + +- **Nodes:** `memory`, `competency`, `skill`, `evidence`, `policy`, `action`, `agent`, `risk`. +- **Edges:** `supports`, `contradicts`, `supersedes`, `requires_veto`, `activates_skill`, `depends_on`, `derived_from`, `decays_to`, `promotes_to`, `handoff_to`, `blocks_unless`, `enforces`. +- **Traversal flow:** `task → intent → competencies → skills → memory → evidence → policy/veto → minimal context → response → audit`. + +The graph is the artefact's reasoning spine; §5's diff operates over it. + +--- + +## 4. Version lineage + +A candidate moves through an explicit lineage: + +```text +candidate → reviewed → release_candidate → stable +``` + +Off-path states: + +- **`deprecated`** — a previously-stable pack marked obsolete (§6). Stays resolvable for lineage, not offered as current. +- **`rolled_back`** — a promoted candidate later judged bad and withdrawn (§6). + +Every transition is recorded in the audit-trail index (§10). No transition implies a public release, tag, DOI, or catalog exposure — those remain separately gated (RFC-009 §7). + +--- + +## 5. Logical diff report + +The diff report compares the previous version to the new candidate **over the context graph and the validation surface**, not just text: + +- Added / removed / changed competencies, gates, evidence rules, and graph edges. +- Any change to gate defaults or human-authority posture is highlighted. + +> **Gate-lowering is a hard fail.** If a candidate lowers a user's effective gate or weakens the human-authority layer relative to the prior version, the diff report fails the candidate. This mirrors RFC-009 §8 criterion 8. + +--- + +## 6. Rollback, deprecation, approval revocation, threat-model sizing + +- **Rollback protocol.** Withdraw a promoted candidate later judged bad: set state `rolled_back`, record reason + prior `output_hash` in the audit index, and restore the last-known-good `stable` as current. No silent overwrite. +- **Deprecation protocol.** Mark a pack obsolete: state `deprecated`, with a successor pointer when one exists. Deprecated packs remain hash-resolvable for lineage. +- **Approval revocation.** A prior "go" can be revoked: record the revocation against the approval id in the audit index; the candidate drops back to `reviewed`. +- **Threat-model sizing.** Every pack gets a threat model (stage 10). High-risk packs (e.g. `security`, `legal`) get a fuller model and, optionally, a stricter reviewer rule (per RFC-009 §11 open-decision 5). Sizing is documented, not assumed. + +--- + +## 7. Determinism and reproducibility + +- **Determinism.** The same inputs MUST produce the same output hash on two runs. Pack-level hashing is shipped today via [`scripts/verify_xklickd_skill_packs.py`](../../../scripts/verify_xklickd_skill_packs.py) (SHA-256 over `.klickd` bytes vs the manifest). +- **Reproducible build.** The same build request (`input_hash`) should produce the same candidate (`output_hash`). Each candidate records an `input_hash → output_hash` pair in the audit index (§10). +- **Honest bound.** The build-level reproducibility *record* is `planned` (stage 15) — the hash *primitive* exists and is shipped; the end-to-end build runner that emits the record does not yet exist. Do not claim full reproducible builds today. + +--- + +## 8. Anti-mirage protocol + +This is the load-bearing integrity rule of the supply chain. + +When the supply chain is being tested or run, **no agent may silently compensate for the chain's work.** The agent's permitted actions are exactly: + +1. **Configure** the build request. +2. **Launch** the chain. +3. **Audit** the output. +4. **Document** the gap between what the chain produced and the reference. +5. **Premium-pass** the candidate — **only after the audit, and only if explicitly requested** (§2). + +A run that produces little, produces a thin pack, or fails is a **valid result** and must be reported as such. A failing or thin run must never be quietly hand-finished and reported as a success of the chain. The point of the chain is to reveal how much it actually automates; masking that defeats the measurement and repeats the trust incident the brief was written to prevent. + +--- + +## 9. Benchmark intent + +Validation compares three things: + +1. A reference pack built manually by an agent. +2. A candidate generated by the supply chain (the design target is 70–80% of the work, *not* a guarantee). +3. The candidate after a gated premium pass (§2). + +Measured against: foundation/transversal coverage, graph coherence, governance/security accuracy, evidence binding, absence of internal-name/confidential leakage, determinism, reproducibility, editorial quality, and ability to pass the hell tests. + +> **Benchmarks must hunt for faults, not confirm success.** A benchmark designed to pass is not evidence. See [`benchmarks/v4.1/`](../../../benchmarks/v4.1/) — the CI workflows there are explicitly no-publish / no-tag. + +--- + +## 10. Audit-trail index + +An **append-only** index records every build, approval, rejection, promotion, rollback, deprecation, and revocation, keyed by candidate id and approval id, each carrying its `input_hash`, `output_hash`, state transition, timestamp, and reason. The index is consultable and is the single source of truth for lineage (§4) and revocation (§6). + +> The audit index as a shipped, queryable artefact is `planned`. The hash primitives it would record exist today (§7); the index writer does not. + +### 10.1 Internal serial fingerprint + +A subtle, private serial fingerprint supports internal traceability / anti-cloning. It is **security-internal only**: documented solely in internal security material, never exposed on any public surface, and intentionally not described here. Its existence is noted so reviewers know the boundary; its construction is out of scope for this public document. + +--- + +## 11. Pointers + +- Artefact contract: [`RFC-009-chimera-v4.1.md`](../RFC-009-chimera-v4.1.md) (§5 architecture, §8/§8.1 validation). +- Truth boundary (load + hash-verify): [`skill-loader-protocol.md`](../../integrations/skill-loader-protocol.md). +- Operator quickstart (human or agent): [`packs/QUICKSTART.md`](./packs/QUICKSTART.md). +- Pack scope + validation summary: [`README.md`](./README.md). +- Concrete pack scaffolds: [`packs/README.md`](./packs/README.md). +- Shipped tooling: [`scripts/verify_xklickd_skill_packs.py`](../../../scripts/verify_xklickd_skill_packs.py), [`scripts/validate_v4_1_candidate_mapping.py`](../../../scripts/validate_v4_1_candidate_mapping.py). +- Benchmark harness (no-publish / no-tag): [`benchmarks/v4.1/`](../../../benchmarks/v4.1/). diff --git a/docs/rfcs/chimera/packs/QUICKSTART.md b/docs/rfcs/chimera/packs/QUICKSTART.md new file mode 100644 index 0000000..554899c --- /dev/null +++ b/docs/rfcs/chimera/packs/QUICKSTART.md @@ -0,0 +1,91 @@ +# x.klickd supply-chain quickstart — build & audit a candidate pack + +> **Status:** Draft · **NON-NORMATIVE** · operator guide for [`../SUPPLY_CHAIN.md`](../SUPPLY_CHAIN.md). +> **Triggers no release.** Nothing here publishes, deploys, tags, or assigns a DOI. +> **Internal naming.** Use the public name **`x.klickd`** in everything you produce. Do not write the internal codename into any pack field or public surface. + +This is the operator path — usable by a **human or an agent** — for running the supply chain on one candidate `carrier_pack`. It is a build-and-audit loop, not a publish flow. For the full process and the per-stage `tool` / `manual` / `planned` honesty labels, read [`../SUPPLY_CHAIN.md`](../SUPPLY_CHAIN.md) first. + +--- + +## Truth boundary (read this before you say "it works") + +A pack is **loaded / used** only when both are true: + +```text +artifact_loaded == true AND sha256_matches_manifest == true +``` + +A catalog row, a stub, a routing placeholder, or a doc page is **not** a loaded skill. If you cannot show both flags true, do not claim the pack is in use. See [`../../../integrations/skill-loader-protocol.md`](../../../integrations/skill-loader-protocol.md). + +--- + +## The 5-step loop + +1. **Configure the build request** (config only — no PII, no secrets). Pick the pack id `x.klickd/`, framework backbone, track (P0/P1), tier (Lite/Pro). This fixes the `input_hash`. +2. **Launch the chain.** Run the available pipeline stages (§3 of the spec). Stages marked `planned` are not runnable yet — note them, don't fake them. +3. **Audit the output.** Verify artefacts and hashes with the shipped tooling (below). Check graph coverage, gates, evidence binding, and that no internal name leaked. +4. **Document the gap.** Write down what the chain produced vs the reference, honestly. A thin or failing run is a valid result (anti-mirage rule, §8 of the spec). +5. **Premium pass — only if requested, only after the audit.** Lift the final layer to reference quality. Never run this before the audit; never use it to mask a failed build. + +--- + +## Multi-agent role split + +If you run this with more than one agent, keep the roles separate so the audit stays honest: + +| Role | Does | Must NOT | +|---|---|---| +| **Builder** | Configures the build request, launches the chain. | Hand-finish output and call it the chain's work. | +| **Auditor** | Verifies artefacts + hashes, checks graph/gates/evidence/leaks, documents the gap. | Edit the candidate to make it pass. | +| **Premium** | Last-layer pass, only after audit + only if requested. | Re-run earlier stages silently; touch anything before the audit exists. | + +The separation is the safeguard: the builder cannot grade itself, and the premium pass cannot pre-empt the measurement. + +--- + +## Shipped verification commands (no install needed) + +```bash +# 1. Verify all 42 published artifacts parse + hash-match the manifest. +python scripts/verify_xklickd_skill_packs.py verify +# -> expect: 42 verified (8 Lite, 34 Pro), all SHA-256 match + +# 2. List them. +python scripts/verify_xklickd_skill_packs.py list + +# 3. Load + hash-verify one (the artifact_loaded + sha256 contract). +python scripts/verify_xklickd_skill_packs.py load work-assistant + +# 4. Validate the candidate↔framework mapping. +python scripts/validate_v4_1_candidate_mapping.py +``` + +These are dependency-free and read the public artifacts directly. They are the concrete backing for stage 3 (mapping) and stage 15 (pack-level determinism) of the pipeline. + +--- + +## Pre-done checklist + +Before you call a candidate done, confirm: + +- [ ] Build request was config-only (no PII / secrets). +- [ ] Verifier passes (`verify` exits 0; hashes match). +- [ ] Candidate mapping validates. +- [ ] Context-graph nodes/edges present (spec §3.1). +- [ ] Gate defaults + human-authority posture declared; no gate lowered. +- [ ] Evidence grounding rule on every claim. +- [ ] No internal codename / confidential structure in any public-facing field. +- [ ] Gap documented honestly; `planned` stages not presented as automated. +- [ ] Premium pass (if any) ran **after** the audit and was explicitly requested. +- [ ] No publish / tag / DOI / catalog action taken. + +--- + +## Pointers + +- Full process spec: [`../SUPPLY_CHAIN.md`](../SUPPLY_CHAIN.md) +- Artefact contract: [`../../RFC-009-chimera-v4.1.md`](../../RFC-009-chimera-v4.1.md) (§8 validation) +- Truth boundary: [`../../../integrations/skill-loader-protocol.md`](../../../integrations/skill-loader-protocol.md) +- Pack index: [`./README.md`](./README.md) +- Benchmark harness: [`../../../../benchmarks/v4.1/`](../../../../benchmarks/v4.1/) diff --git a/docs/rfcs/chimera/packs/README.md b/docs/rfcs/chimera/packs/README.md index bbfca37..06a3bde 100644 --- a/docs/rfcs/chimera/packs/README.md +++ b/docs/rfcs/chimera/packs/README.md @@ -107,6 +107,8 @@ Anchor ≠ pack. A pack is built **from authoritative frameworks**, not from a r ## 7. See also +- [`../SUPPLY_CHAIN.md`](../SUPPLY_CHAIN.md) — supply-chain build-process spec (NON-NORMATIVE): 18-stage pipeline, lineage, rollback/deprecation, determinism, anti-mirage protocol, audit index. +- [`./QUICKSTART.md`](./QUICKSTART.md) — operator quickstart (human or agent) for the build-and-audit loop + shipped verification commands. - [`../../RFC-009-chimera-v4.1.md`](../../RFC-009-chimera-v4.1.md) — Chimera RFC (carrier-vs-skill in §5.1.1, validation in §8, no-catalog in §7). - [`../README.md`](../README.md) — Chimera companion summary. - [`./student.md`](./student.md) — first concrete pack scaffold. From a51a67109a35c9e236d18fc2c95c8d1cae63357f Mon Sep 17 00:00:00 2001 From: klickd-agent Date: Tue, 2 Jun 2026 11:21:57 +0000 Subject: [PATCH 2/8] feat(supply-chain): add tool-backed audit-trail index + determinism record First real automation stage of the x.klickd supply-chain protocol. Adds a stdlib-only, offline generator that collects the 42 verifiable v4.1 candidate skill packs (+ manifest), enforces the loaded+sha256_matches_manifest gate, and writes two re-checkable artefacts: - .internal-skills/supply-chain/audit/audit_trail_index.json - .internal-skills/supply-chain/audit/determinism_record.json deterministic_run_id is derived only from inputs (timestamps quarantined in a non_deterministic_zone, excluded from every hash), so identical inputs yield an identical id across runs and hosts. A `check` subcommand verifies on-disk artefacts are in sync and exits non-zero on drift or on banned-claim/secret content. validation_results is left empty by design: the generator records but does not run the validation commands, so it asserts no outcomes it did not observe (anti-mirage). Only stages labelled `tool` are automated; everything else stays `planned` / `partial` / `manual` per the stage_automation map. Not a v4.1 GA release. No publish/deploy/merge/tag. Co-Authored-By: Claude Opus 4.7 --- .internal-skills/supply-chain/audit/README.md | 70 +++ .../supply-chain/audit/audit_trail_index.json | 52 ++ .../audit/determinism_record.json | 252 +++++++++ scripts/generate_supply_chain_audit.py | 522 ++++++++++++++++++ tests/test_supply_chain_audit.py | 158 ++++++ 5 files changed, 1054 insertions(+) create mode 100644 .internal-skills/supply-chain/audit/README.md create mode 100644 .internal-skills/supply-chain/audit/audit_trail_index.json create mode 100644 .internal-skills/supply-chain/audit/determinism_record.json create mode 100644 scripts/generate_supply_chain_audit.py create mode 100644 tests/test_supply_chain_audit.py diff --git a/.internal-skills/supply-chain/audit/README.md b/.internal-skills/supply-chain/audit/README.md new file mode 100644 index 0000000..20f6a5a --- /dev/null +++ b/.internal-skills/supply-chain/audit/README.md @@ -0,0 +1,70 @@ +# x.klickd supply-chain — audit-trail index + determinism record + +**Status:** NON-NORMATIVE. Not a v4.1 GA release artefact. No publish / deploy / +merge / tag / release performed by this stage. + +This directory holds the **first tool-backed automation stage** of the x.klickd +supply-chain protocol. It does **not** automate the full pipeline. It turns two +traceability elements from spec into artefacts that are actually generated, +hashed, and re-checkable by a script: + +| File | What it is | +|---|---| +| `audit_trail_index.json` | A consultable index of the verifiable artifacts the supply chain operates on, the declared validation commands, an append-style event list, and a per-stage automation map. | +| `determinism_record.json` | Input file hashes, output file hashes, and a `deterministic_run_id` derived **only** from inputs, so identical inputs yield an identical id across runs and hosts. | + +## Generate / re-check + +```bash +# Write (or refresh) both artefacts: +python scripts/generate_supply_chain_audit.py + +# Verify the on-disk artefacts are still in sync with current inputs (no write): +python scripts/generate_supply_chain_audit.py check +``` + +`generate` exits non-zero if a critical invariant fails (missing or changed +input, hash mismatch against the manifest, banned public-claim string, or an +obvious secret/PII pattern in the generated output). `check` exits non-zero on +any drift in the deterministic core. + +## Determinism + +- The inputs are the 42 NON-NORMATIVE x.klickd v4.1 candidate skill packs plus + their manifest under `examples/v4.1/x-klickd-skills/` (43 inputs total). +- An input is counted **only** when its bytes exist on disk **and** hash-match + the manifest — the same `artifact_loaded` + `sha256_matches_manifest` gate + enforced by `scripts/verify_xklickd_skill_packs.py`. A catalogue entry alone + is not a loaded skill. +- `deterministic_run_id` and `checked_artifacts_hash_summary` are computed over + `(relative_path, sha256)` pairs only. They do **not** depend on timestamps, + host, or run order. +- The only non-deterministic field, `generated_at`, is quarantined under + `non_deterministic_zone` and is **excluded** from every hash. + +## What is real vs. planned + +`stage_automation` in `audit_trail_index.json` labels each pipeline stage: + +- `tool` — backed by shipped, runnable automation (audit-trail index, + determinism record, reproducibility check, pack hash verification, candidate + mapping validation). +- `partial` — a tripwire, not a full implementation (the PII/secrets scan here + guards only this stage's own generated output). +- `planned` — spec-only; no automation yet (diff report, threat model, license + check, source-freshness check, private/public boundary check, context-graph + generation, candidate-skill generation). +- `manual` — human/agent premium pass. + +`validation_results` is intentionally **empty**: this generator records the +declared validation commands but does not run them, so it does not assert their +outcomes. Pre-filled "pass" values would be a mirage. The operator runs the +commands; the audit / CI captures the outcomes. + +## Relation to the supply-chain spec + +The full 18-stage build-process specification is documented separately in the +supply-chain RFC under `docs/rfcs/` (the docs-only spec PR; not merged here). +This stage is the narrow, executable slice of stage **15 (determinism / +reproducibility)** and the **audit-trail index** from that spec. Everything else +in the pipeline remains `planned` until separately implemented. diff --git a/.internal-skills/supply-chain/audit/audit_trail_index.json b/.internal-skills/supply-chain/audit/audit_trail_index.json new file mode 100644 index 0000000..304b870 --- /dev/null +++ b/.internal-skills/supply-chain/audit/audit_trail_index.json @@ -0,0 +1,52 @@ +{ + "build_or_audit_events": [ + { + "automation": "tool", + "event": "audit_trail_index_generated", + "inputs_hash_summary": "10fa77ec74ebfa2b7daa51a5787607b1dc9eb654608477f478c7850ab5a09b85", + "source_commit_sha": "b73858cb2d9c6915195361e9ed34ed1b02a39ea4", + "stage": "audit_trail_index" + } + ], + "checked_artifacts_count": 43, + "checked_artifacts_hash_summary": "10fa77ec74ebfa2b7daa51a5787607b1dc9eb654608477f478c7850ab5a09b85", + "deterministic_run_id": "sha256:10fa77ec74ebfa2b7daa51a5787607b1dc9eb654608477f478c7850ab5a09b85", + "kind": "x_klickd_supply_chain_audit_trail_index", + "non_deterministic_zone": { + "comment": "Fields here are excluded from deterministic_run_id and checked_artifacts_hash_summary.", + "generated_at": "2026-06-02T11:20:28Z" + }, + "non_normative": true, + "notes": [ + "NON-NORMATIVE. Not a v4.1 GA release artefact.", + "Only the stages marked 'tool' are backed by shipped automation; 'planned' stages are spec-only; 'partial' is a tripwire, not a full scanner; 'manual' is human/agent premium work.", + "An artifact is counted only when its bytes exist on disk and hash-match the manifest (loaded + sha256_matches_manifest).", + "validation_results is empty by design: this generator does not run the validation commands, so it does not assert their outcomes.", + "Timestamps are excluded from deterministic_run_id; see determinism_record.json non_deterministic_zone." + ], + "repo": "Davincc77/klickdskill", + "schema_version": "0.1.0", + "source_commit_sha": "b73858cb2d9c6915195361e9ed34ed1b02a39ea4", + "stage_automation": { + "audit_trail_index": "tool", + "candidate_mapping_validation": "tool", + "candidate_skill_generation": "planned", + "context_graph_generation": "planned", + "determinism_record": "tool", + "diff_report": "planned", + "license_check": "planned", + "pack_hash_verification": "tool", + "pii_secrets_scan": "partial", + "premium_pass": "manual", + "private_public_boundary_check": "planned", + "reproducibility_check": "tool", + "source_freshness_check": "planned", + "threat_model": "planned" + }, + "validation_commands": [ + "python scripts/verify_xklickd_skill_packs.py verify", + "python scripts/validate_v4_1_candidate_mapping.py", + "pytest tests/test_supply_chain_audit.py" + ], + "validation_results": [] +} diff --git a/.internal-skills/supply-chain/audit/determinism_record.json b/.internal-skills/supply-chain/audit/determinism_record.json new file mode 100644 index 0000000..52b3bb6 --- /dev/null +++ b/.internal-skills/supply-chain/audit/determinism_record.json @@ -0,0 +1,252 @@ +{ + "deterministic_run_id": "sha256:10fa77ec74ebfa2b7daa51a5787607b1dc9eb654608477f478c7850ab5a09b85", + "hash_algo": "sha256", + "input_files": [ + { + "bytes": 9308, + "relative_path": "examples/v4.1/x-klickd-skills/lite/artist.klickd", + "sha256": "56dbd966942475354e4f2daac48c3aeabfedbf3ff48f6248c443cdf8fe82b5c8" + }, + { + "bytes": 9353, + "relative_path": "examples/v4.1/x-klickd-skills/lite/consumer-rights.klickd", + "sha256": "16f77c9b7cb0801f198407f063c1b8395b98f6f729a14597ede8a15ae766ebbc" + }, + { + "bytes": 9325, + "relative_path": "examples/v4.1/x-klickd-skills/lite/game-literacy.klickd", + "sha256": "6e3158a2bde1024f54db53d149e525b820c4c167a71124416c8f4e8df38a3632" + }, + { + "bytes": 9719, + "relative_path": "examples/v4.1/x-klickd-skills/lite/media-planner.klickd", + "sha256": "a399ef56eb140d5adf272ffe3578448085f082154647811853e3d52e58b3a33e" + }, + { + "bytes": 10149, + "relative_path": "examples/v4.1/x-klickd-skills/lite/parent-gaming.klickd", + "sha256": "f2016892ac731f284d37d207a9464dcb93551b6934ff86f09d05fcd9d1a0ce52" + }, + { + "bytes": 9745, + "relative_path": "examples/v4.1/x-klickd-skills/lite/social-literacy.klickd", + "sha256": "5da46abae45a40adf562d1119bd4240c60d14b7c2b798e8e47aed4122834d9dc" + }, + { + "bytes": 9332, + "relative_path": "examples/v4.1/x-klickd-skills/lite/streaming-creator.klickd", + "sha256": "83a47a7370650c9870d28566344a11e52298a9e0a581003dc0ea8009558ac8ce" + }, + { + "bytes": 10312, + "relative_path": "examples/v4.1/x-klickd-skills/lite/work-assistant.klickd", + "sha256": "7918922d04e406c406f1d8a1a6aeaa3f82fb2a9c5697a547f001cfac85063bcb" + }, + { + "bytes": 15656, + "relative_path": "examples/v4.1/x-klickd-skills/manifest.json", + "sha256": "4f54e35ae1469dc0519f7ff97de0b69395f360e167220bc204525cfcb9c7c55f" + }, + { + "bytes": 11371, + "relative_path": "examples/v4.1/x-klickd-skills/pro/accounting-operator.klickd", + "sha256": "0464b9d8bd284e7c498a60064f8f923e148f2ac3e0e39fd7c0f479174c8ad243" + }, + { + "bytes": 11707, + "relative_path": "examples/v4.1/x-klickd-skills/pro/api-integrator.klickd", + "sha256": "875536c647ff1905f2e54a7997977b7d4087b7153b8b9a7c3270aef301af6dab" + }, + { + "bytes": 10615, + "relative_path": "examples/v4.1/x-klickd-skills/pro/contract-review.klickd", + "sha256": "4be316d9bbb758e9d98dcf81ac25ef1fc9a8e24d9cb6462055d66660f104ba41" + }, + { + "bytes": 12349, + "relative_path": "examples/v4.1/x-klickd-skills/pro/customer-support-operator.klickd", + "sha256": "9e6c755f8d975110bd8fd5bfdb1d87773f1a571968bf27933357b7bcc13516e9" + }, + { + "bytes": 12175, + "relative_path": "examples/v4.1/x-klickd-skills/pro/data-analyst.klickd", + "sha256": "8ae8fe5e3b55d4402e0da93738e97d23c41d3daeb93c6bdc41301486a2f6cd4f" + }, + { + "bytes": 12201, + "relative_path": "examples/v4.1/x-klickd-skills/pro/devops-operator.klickd", + "sha256": "bfad80b1110461f683c7000b9af1104ccdcd33a10c3059e3898dabea3f1d6fb4" + }, + { + "bytes": 10450, + "relative_path": "examples/v4.1/x-klickd-skills/pro/drone-operator.klickd", + "sha256": "d4017d93c8b8ac765e59914b61116bd90bd68944c4a2b47e031dafc7d562ec23" + }, + { + "bytes": 12705, + "relative_path": "examples/v4.1/x-klickd-skills/pro/edge-ai-operator.klickd", + "sha256": "295cd879168320e5af7d19a0c82a1bd0f8be52fbf9f16ec80c9db7f33b3cef6e" + }, + { + "bytes": 10507, + "relative_path": "examples/v4.1/x-klickd-skills/pro/eu-ai-act.klickd", + "sha256": "4df66de2930e3f4d726c08a1e0d15e64c6929f4cbbd6a593d69b9f88c5a9de93" + }, + { + "bytes": 11765, + "relative_path": "examples/v4.1/x-klickd-skills/pro/evidence-desk.klickd", + "sha256": "a1cc35277960a95ebbe12ca12c00d9d70ce71dc58cae99b642653835b90d14dd" + }, + { + "bytes": 11950, + "relative_path": "examples/v4.1/x-klickd-skills/pro/finance-analyst.klickd", + "sha256": "dbf02eac4910f73b35f818bd20545cd41f6a391ca06adf59f9470aa421afb326" + }, + { + "bytes": 12342, + "relative_path": "examples/v4.1/x-klickd-skills/pro/game-design.klickd", + "sha256": "0894cec1f3f54b770a16fd3f07e16817ee57e574bdb2ca0837b8653362f3abb3" + }, + { + "bytes": 10671, + "relative_path": "examples/v4.1/x-klickd-skills/pro/gdpr-readiness.klickd", + "sha256": "d5c4cb7b707fc3f028424f34149cb95300fae583f9a261dfc161914da43bbc40" + }, + { + "bytes": 12293, + "relative_path": "examples/v4.1/x-klickd-skills/pro/healthcare-ai-safety-reviewer.klickd", + "sha256": "183965179ebcdb7ef0f67866aa9c7364aaad1bd02996b4d0c696580ee2c6f789" + }, + { + "bytes": 12310, + "relative_path": "examples/v4.1/x-klickd-skills/pro/identity-access-management.klickd", + "sha256": "d9112c7f82733e77bc382c4ca314c2bf3112c63ca03455a7efd037b6c33cd74d" + }, + { + "bytes": 12206, + "relative_path": "examples/v4.1/x-klickd-skills/pro/learning-designer.klickd", + "sha256": "f7b1f5d39ee4f05261b4735a32475dee2583556681124c59b0250335729188f9" + }, + { + "bytes": 10906, + "relative_path": "examples/v4.1/x-klickd-skills/pro/literature-review.klickd", + "sha256": "104865d2750605d9261fd27208b0e19e2908861e9e5672ed453d383701a770f1" + }, + { + "bytes": 12164, + "relative_path": "examples/v4.1/x-klickd-skills/pro/llm-agent-engineering.klickd", + "sha256": "1649234c2859b8309c404f71f5c0db714805655cdbb714b7bdb9394644fb49f8" + }, + { + "bytes": 13319, + "relative_path": "examples/v4.1/x-klickd-skills/pro/llm-agent-security.klickd", + "sha256": "baed596a642f68086cabc1a76d09ba6d987231a057c8fb66f1aabb84dd137098" + }, + { + "bytes": 11861, + "relative_path": "examples/v4.1/x-klickd-skills/pro/mission-control.klickd", + "sha256": "2c2d02b4ecec40f022c4c7da0cdb3f082e588dfe11d3f14d1692dbbdba7e8ba2" + }, + { + "bytes": 11275, + "relative_path": "examples/v4.1/x-klickd-skills/pro/policy-analyst.klickd", + "sha256": "f348f1d1a73797979a51005586dcd9f1069a17fab173dab7df742290127c73a7" + }, + { + "bytes": 10728, + "relative_path": "examples/v4.1/x-klickd-skills/pro/privacy-product.klickd", + "sha256": "5a35d4a214a98f945f048c6647528df5ce2188aaa3d0913b7c6a1af2305eff00" + }, + { + "bytes": 12231, + "relative_path": "examples/v4.1/x-klickd-skills/pro/product-manager.klickd", + "sha256": "dbf2365508df3ae9acbbb6e002e3c44fb2a81e9166d0003cacb94a3714fdaeda" + }, + { + "bytes": 11164, + "relative_path": "examples/v4.1/x-klickd-skills/pro/project-operator.klickd", + "sha256": "5d019adfc08bcf518780bbe3e18a428682374397475a8bf6282d1e474ae6d5ce" + }, + { + "bytes": 12865, + "relative_path": "examples/v4.1/x-klickd-skills/pro/release-engineer.klickd", + "sha256": "c96329cff482f39d52a13b628bd027cd5854f15d40ce58c945fb58aec52aef9e" + }, + { + "bytes": 10347, + "relative_path": "examples/v4.1/x-klickd-skills/pro/rights-guard.klickd", + "sha256": "3efa2982479b6ba3544092d848fb52b91dc7687031c4a481181547d9728703b3" + }, + { + "bytes": 12186, + "relative_path": "examples/v4.1/x-klickd-skills/pro/sales-operator.klickd", + "sha256": "1bdc2c5895e07b909dda9bd04040917004727bed00198962c006a43a0be13060" + }, + { + "bytes": 11153, + "relative_path": "examples/v4.1/x-klickd-skills/pro/second-brain.klickd", + "sha256": "06504aa5dddc1b53634310210fff4374e68e0a2b959ae9dfa191b29995153bdb" + }, + { + "bytes": 12682, + "relative_path": "examples/v4.1/x-klickd-skills/pro/security-incident-response.klickd", + "sha256": "1315833cf73b3d04e00fe73006e985b0a7af7d266c457e18f61204cca09dcc7a" + }, + { + "bytes": 12667, + "relative_path": "examples/v4.1/x-klickd-skills/pro/sustainability-analyst.klickd", + "sha256": "445a7f1111e6474ca87bc717d0f50d6af5016863fe3fd87083502d77eeffcf1f" + }, + { + "bytes": 12150, + "relative_path": "examples/v4.1/x-klickd-skills/pro/technical-writer.klickd", + "sha256": "473c44c67a889c783872494f262feb16bdb1377a74dd5b4d605fe340ecebecc9" + }, + { + "bytes": 11317, + "relative_path": "examples/v4.1/x-klickd-skills/pro/trust-evidence.klickd", + "sha256": "57f4ac349438d3c3305e03ecf9a09a44d20e289550f8afa292267666bcc150ad" + }, + { + "bytes": 12109, + "relative_path": "examples/v4.1/x-klickd-skills/pro/ux-researcher.klickd", + "sha256": "d91afbf120c6175c4d23d175265697b7a0a7bd4f11dd60fc08bb053c8113e13e" + }, + { + "bytes": 16908, + "relative_path": "examples/v4.1/x-klickd-skills/pro/video-production-pipeline.klickd", + "sha256": "d6da8f7569953f7c6fb4f0ce02cb2649187bcbf52ae3f4d32a35c8fe885498a8" + } + ], + "inputs_hash_summary": "10fa77ec74ebfa2b7daa51a5787607b1dc9eb654608477f478c7850ab5a09b85", + "kind": "x_klickd_supply_chain_determinism_record", + "non_deterministic_zone": { + "comment": "Excluded from deterministic_run_id and from the output determinism hashes.", + "generated_at": "2026-06-02T11:20:28Z" + }, + "non_normative": true, + "output_files": [ + { + "deterministic_core_sha256": "8443e75756ffbbf1e138efec221cf117c70d76b15ea7d198822c5a46975a1ab6", + "relative_path": ".internal-skills/supply-chain/audit/audit_trail_index.json" + }, + { + "deterministic_core_sha256": "37a5891642a59ff26078e70bccea5e387eb8465ff0ca212b564ea63e728e3bee", + "relative_path": ".internal-skills/supply-chain/audit/determinism_record.json" + } + ], + "repeatability": { + "deterministic_fields": [ + "deterministic_run_id", + "inputs_hash_summary", + "input_files[*].sha256" + ], + "instructions": "Re-run `python scripts/generate_supply_chain_audit.py`. If inputs are unchanged, deterministic_run_id and inputs_hash_summary are identical across runs and hosts.", + "non_deterministic_fields_excluded": [ + "non_deterministic_zone.generated_at", + "source_commit_sha (provenance, not part of the hash)" + ] + }, + "repo": "Davincc77/klickdskill", + "schema_version": "0.1.0" +} diff --git a/scripts/generate_supply_chain_audit.py b/scripts/generate_supply_chain_audit.py new file mode 100644 index 0000000..e3f92ed --- /dev/null +++ b/scripts/generate_supply_chain_audit.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +"""Generate the x.klickd supply-chain audit-trail index + determinism record. + +This is the FIRST real (tool-backed) automation stage of the supply-chain +protocol described in the supply-chain RFC under docs/rfcs/ (docs-only spec PR, +not merged) and summarised in .internal-skills/supply-chain/audit/README.md. It +does NOT automate the full pipeline. It turns two traceability elements from +spec into artefacts that are actually produced, hashed, and re-checkable: + + 1. audit_trail_index.json -- a consultable index of the verifiable + artifacts the supply chain operates on, the + validation commands run against them, and an + append-style event list. + 2. determinism_record.json -- input file hashes, output file hashes, and a + deterministic_run_id derived only from inputs, + so two runs over identical inputs produce an + identical id (timestamps are quarantined in a + documented non-deterministic zone and are NOT + part of the hash). + +Inputs are the 42 NON-NORMATIVE x.klickd v4.1 candidate skill packs and their +manifest under examples/v4.1/x-klickd-skills/. A pack is only treated as a real +artifact here because its bytes exist on disk and hash-match the manifest -- +the same loaded + sha256_matches_manifest gate enforced by +scripts/verify_xklickd_skill_packs.py. A catalogue entry alone is NOT a loaded +skill. + +Stdlib-only. Offline. No network, no provider calls, no paid resources. No +release, tag, merge, publish, or deploy. Does not touch the private repo. + +CLI: + + python scripts/generate_supply_chain_audit.py # write artefacts + python scripts/generate_supply_chain_audit.py generate # (explicit) write + python scripts/generate_supply_chain_audit.py check # verify on-disk + # artefacts are + # in sync; no write + +Exit codes: + 0 success (write succeeded, or check found no drift) + 1 a critical invariant failed (missing/changed input, hash mismatch, + banned claim, obvious secret/PII), or `check` found drift + 2 usage / I-O error +""" + +from __future__ import annotations + +import datetime as _dt +import hashlib +import json +import re +import sys +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[1] +PACK_DIR = REPO_ROOT / "examples" / "v4.1" / "x-klickd-skills" +MANIFEST_PATH = PACK_DIR / "manifest.json" + +AUDIT_DIR = REPO_ROOT / ".internal-skills" / "supply-chain" / "audit" +AUDIT_INDEX_PATH = AUDIT_DIR / "audit_trail_index.json" +DETERMINISM_PATH = AUDIT_DIR / "determinism_record.json" + +SCHEMA_VERSION = "0.1.0" +REPO_NAME = "Davincc77/klickdskill" + +# Validation commands this stage records as the supply-chain's current +# tool-backed checks. They are recorded as declared commands; this generator +# does not silently run them (anti-mirage: the operator runs and audits them). +VALIDATION_COMMANDS = [ + "python scripts/verify_xklickd_skill_packs.py verify", + "python scripts/validate_v4_1_candidate_mapping.py", + "pytest tests/test_supply_chain_audit.py", +] + +# Substrings that must never appear in the generated public-facing artefacts. +# Two classes: internal codename leak, and banned unbounded public claims. +BANNED_SUBSTRINGS = ( + "chimera", + "universal standard", + "automatic gdpr", + "automatic eu ai act", + "benchmark superiority", + "proven benchmark", +) + +# Coarse secret / PII signatures. This is a tripwire on our OWN generated +# output, not a general scanner -- the inputs are public artifacts, but we +# refuse to emit anything that looks like a credential or personal contact. +_SECRET_PATTERNS = ( + re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----"), + re.compile(r"\bAKIA[0-9A-Z]{16}\b"), + re.compile(r"\bsk-[A-Za-z0-9]{20,}\b"), + re.compile(r"\bghp_[A-Za-z0-9]{36}\b"), + re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"), +) + + +class InvariantError(RuntimeError): + """Raised when a critical supply-chain invariant fails.""" + + +def _rel(path: Path) -> str: + """Repo-relative path when possible, else the bare name. + + The bare-name fallback keeps the output stable and the record self-describing + when artefacts are written outside the repo (e.g. a temp dir under test). + """ + try: + return str(path.relative_to(REPO_ROOT)) + except ValueError: + return path.name + + +def _sha256_bytes(data: bytes) -> str: + return hashlib.sha256(data).hexdigest() + + +def _sha256_file(path: Path) -> str: + return _sha256_bytes(path.read_bytes()) + + +def _load_manifest() -> dict[str, Any]: + if not MANIFEST_PATH.exists(): + raise InvariantError(f"manifest not found at {MANIFEST_PATH}") + return json.loads(MANIFEST_PATH.read_text(encoding="utf-8")) + + +def _pack_path(entry: dict[str, Any]) -> Path: + rel = entry.get("relative_path") + if rel: + return REPO_ROOT / rel + return PACK_DIR / entry["tier"] / entry["file"] + + +def _git_commit_sha() -> str | None: + """Best-effort source commit, read from .git without invoking git. + + Returns None when not in a usable git checkout (the artefact then records + null rather than a guessed value -- never fabricate provenance). + """ + head = REPO_ROOT / ".git" / "HEAD" + if not head.exists(): + return None + try: + ref = head.read_text(encoding="utf-8").strip() + except OSError: + return None + if ref.startswith("ref:"): + ref_path = REPO_ROOT / ".git" / ref.split(" ", 1)[1].strip() + if ref_path.exists(): + return ref_path.read_text(encoding="utf-8").strip() or None + # packed-refs fallback + packed = REPO_ROOT / ".git" / "packed-refs" + target = ref.split(" ", 1)[1].strip() + if packed.exists(): + for line in packed.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith(("#", "^")): + continue + sha, _, name = line.partition(" ") + if name == target: + return sha or None + return None + return ref or None + + +def _collect_inputs() -> list[dict[str, Any]]: + """Collect the verifiable supply-chain inputs with deterministic ordering. + + Each input must (a) exist on disk and (b) hash-match the manifest, mirroring + the loaded + sha256_matches_manifest gate. A mismatch is a critical + invariant failure -- we do NOT silently paper over it. + """ + manifest = _load_manifest() + packs = manifest.get("packs", []) + if manifest.get("total_count") != 42 or len(packs) != 42: + raise InvariantError( + f"manifest must report 42 packs, got " + f"total_count={manifest.get('total_count')} entries={len(packs)}" + ) + + inputs: list[dict[str, Any]] = [] + # The manifest itself is an input. + inputs.append( + { + "role": "manifest", + "relative_path": str(MANIFEST_PATH.relative_to(REPO_ROOT)), + "bytes": MANIFEST_PATH.stat().st_size, + "sha256": _sha256_file(MANIFEST_PATH), + } + ) + + for entry in packs: + path = _pack_path(entry) + label = entry.get("file", "") + if not path.exists(): + raise InvariantError(f"{label}: missing input file at {path}") + data = path.read_bytes() + sha = _sha256_bytes(data) + expected = entry.get("sha256_file") + if sha != expected: + raise InvariantError( + f"{label}: sha256 {sha} != manifest {expected} " + "(artifact not in a loaded+verified state)" + ) + if len(data) != entry.get("bytes"): + raise InvariantError( + f"{label}: byte length {len(data)} != manifest {entry.get('bytes')}" + ) + inputs.append( + { + "role": "pack", + "pack": entry.get("pack"), + "tier": entry.get("tier"), + "relative_path": entry.get("relative_path"), + "bytes": len(data), + "sha256": sha, + } + ) + + # Stable ordering by relative_path so the derived id is order-independent + # w.r.t. manifest layout changes that do not change content. + inputs.sort(key=lambda x: x["relative_path"]) + return inputs + + +def _hash_summary(inputs: list[dict[str, Any]]) -> str: + """A single deterministic digest over (relative_path, sha256) pairs. + + Depends only on input content + identity -- not on timestamps, host, or run + order -- so it is the reproducibility anchor. + """ + h = hashlib.sha256() + for item in inputs: + h.update(item["relative_path"].encode("utf-8")) + h.update(b"\0") + h.update(item["sha256"].encode("utf-8")) + h.update(b"\n") + return h.hexdigest() + + +def _scan_banned(text: str) -> list[str]: + low = text.lower() + return [s for s in BANNED_SUBSTRINGS if s in low] + + +def _scan_secrets(text: str) -> list[str]: + hits: list[str] = [] + for pat in _SECRET_PATTERNS: + if pat.search(text): + hits.append(pat.pattern) + return hits + + +def build_records() -> tuple[dict[str, Any], dict[str, Any]]: + """Build (audit_index, determinism_record) as plain dicts. + + The deterministic core of both records excludes the timestamp, which lives + only under `non_deterministic_zone`. + """ + inputs = _collect_inputs() + inputs_hash_summary = _hash_summary(inputs) + commit_sha = _git_commit_sha() + now = _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # deterministic_run_id is derived ONLY from inputs -> identical inputs give + # an identical id across runs / hosts / clocks. + deterministic_run_id = "sha256:" + inputs_hash_summary + + audit_index: dict[str, Any] = { + "schema_version": SCHEMA_VERSION, + "kind": "x_klickd_supply_chain_audit_trail_index", + "non_normative": True, + "repo": REPO_NAME, + "source_commit_sha": commit_sha, + "deterministic_run_id": deterministic_run_id, + "checked_artifacts_count": len(inputs), + "checked_artifacts_hash_summary": inputs_hash_summary, + "validation_commands": list(VALIDATION_COMMANDS), + # validation_results is intentionally empty here: this generator records + # the declared commands but does NOT run them, so it cannot honestly + # assert their results. The operator runs them and the audit/CI captures + # outcomes. Pre-filled "pass" values would be a mirage. + "validation_results": [], + "build_or_audit_events": [ + { + "event": "audit_trail_index_generated", + "stage": "audit_trail_index", + "automation": "tool", + "inputs_hash_summary": inputs_hash_summary, + "source_commit_sha": commit_sha, + } + ], + "stage_automation": { + "audit_trail_index": "tool", + "determinism_record": "tool", + "reproducibility_check": "tool", + "pack_hash_verification": "tool", + "candidate_mapping_validation": "tool", + "diff_report": "planned", + "threat_model": "planned", + "license_check": "planned", + "source_freshness_check": "planned", + "pii_secrets_scan": "partial", + "private_public_boundary_check": "planned", + "context_graph_generation": "planned", + "candidate_skill_generation": "planned", + "premium_pass": "manual", + }, + "notes": [ + "NON-NORMATIVE. Not a v4.1 GA release artefact.", + "Only the stages marked 'tool' are backed by shipped automation; " + "'planned' stages are spec-only; 'partial' is a tripwire, not a " + "full scanner; 'manual' is human/agent premium work.", + "An artifact is counted only when its bytes exist on disk and " + "hash-match the manifest (loaded + sha256_matches_manifest).", + "validation_results is empty by design: this generator does not run " + "the validation commands, so it does not assert their outcomes.", + "Timestamps are excluded from deterministic_run_id; see " + "determinism_record.json non_deterministic_zone.", + ], + "non_deterministic_zone": { + "generated_at": now, + "comment": "Fields here are excluded from deterministic_run_id and " + "checked_artifacts_hash_summary.", + }, + } + + determinism_record: dict[str, Any] = { + "schema_version": SCHEMA_VERSION, + "kind": "x_klickd_supply_chain_determinism_record", + "non_normative": True, + "repo": REPO_NAME, + "hash_algo": "sha256", + "deterministic_run_id": deterministic_run_id, + "input_files": [ + {"relative_path": i["relative_path"], "sha256": i["sha256"], "bytes": i["bytes"]} + for i in inputs + ], + "inputs_hash_summary": inputs_hash_summary, + # output_files hashes are computed over the deterministic core of each + # output (with non_deterministic_zone stripped), so the record is + # self-consistent across runs. See verify_outputs(). + "output_files": [ + {"relative_path": _rel(AUDIT_INDEX_PATH)}, + {"relative_path": _rel(DETERMINISM_PATH)}, + ], + "repeatability": { + "instructions": "Re-run `python scripts/generate_supply_chain_audit.py`. " + "If inputs are unchanged, deterministic_run_id and " + "inputs_hash_summary are identical across runs and hosts.", + "deterministic_fields": [ + "deterministic_run_id", + "inputs_hash_summary", + "input_files[*].sha256", + ], + "non_deterministic_fields_excluded": [ + "non_deterministic_zone.generated_at", + "source_commit_sha (provenance, not part of the hash)", + ], + }, + "non_deterministic_zone": { + "generated_at": now, + "comment": "Excluded from deterministic_run_id and from the output " + "determinism hashes.", + }, + } + return audit_index, determinism_record + + +def _deterministic_core(record: dict[str, Any]) -> dict[str, Any]: + """Return a copy of `record` with the non-deterministic zone removed. + + Used to hash outputs in a clock-independent way. + """ + core = dict(record) + core.pop("non_deterministic_zone", None) + core.pop("source_commit_sha", None) + if "build_or_audit_events" in core: + core["build_or_audit_events"] = [ + {k: v for k, v in ev.items() if k != "source_commit_sha"} + for ev in core["build_or_audit_events"] + ] + return core + + +def _canonical_json(obj: Any) -> str: + return json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=True) + "\n" + + +def _serialize(record: dict[str, Any]) -> str: + # Stable, human-diffable serialization. sort_keys keeps the on-disk bytes + # deterministic regardless of dict construction order. + return _canonical_json(record) + + +def _guard_output(name: str, text: str) -> None: + banned = _scan_banned(text) + if banned: + raise InvariantError(f"{name}: banned substring(s) present: {banned}") + secrets = _scan_secrets(text) + if secrets: + raise InvariantError(f"{name}: possible secret/PII pattern(s): {secrets}") + + +def cmd_generate() -> int: + audit_index, determinism_record = build_records() + + # Stamp the deterministic-core hashes of each output into the determinism + # record so the record describes the bytes it ships next to. + audit_core_hash = _sha256_bytes( + _canonical_json(_deterministic_core(audit_index)).encode("utf-8") + ) + det_core_hash = _sha256_bytes( + _canonical_json(_deterministic_core(determinism_record)).encode("utf-8") + ) + for out in determinism_record["output_files"]: + if out["relative_path"].endswith("audit_trail_index.json"): + out["deterministic_core_sha256"] = audit_core_hash + else: + out["deterministic_core_sha256"] = det_core_hash + + audit_text = _serialize(audit_index) + det_text = _serialize(determinism_record) + + _guard_output("audit_trail_index.json", audit_text) + _guard_output("determinism_record.json", det_text) + + AUDIT_DIR.mkdir(parents=True, exist_ok=True) + AUDIT_INDEX_PATH.write_text(audit_text, encoding="utf-8") + DETERMINISM_PATH.write_text(det_text, encoding="utf-8") + + print( + f"OK: wrote audit-trail index + determinism record " + f"({audit_index['checked_artifacts_count']} artifacts, " + f"run_id {audit_index['deterministic_run_id']})." + ) + print(f" - {_rel(AUDIT_INDEX_PATH)}") + print(f" - {_rel(DETERMINISM_PATH)}") + return 0 + + +def cmd_check() -> int: + """Verify on-disk artefacts are in sync with current inputs (no write). + + Compares the deterministic core of the freshly-built records against the + deterministic core of the on-disk records. Drift in the time-quarantined + zone is ignored; drift anywhere else (or missing files) is a failure. + """ + if not AUDIT_INDEX_PATH.exists() or not DETERMINISM_PATH.exists(): + print("FAIL: audit artefacts missing; run generate.", file=sys.stderr) + return 1 + + audit_index, determinism_record = build_records() + + disk_audit = json.loads(AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + disk_det = json.loads(DETERMINISM_PATH.read_text(encoding="utf-8")) + + problems: list[str] = [] + if _deterministic_core(audit_index) != _deterministic_core(disk_audit): + problems.append("audit_trail_index.json out of sync with current inputs") + # output_files carry computed hashes; rebuild them before comparing. + audit_core_hash = _sha256_bytes( + _canonical_json(_deterministic_core(audit_index)).encode("utf-8") + ) + det_core_hash = _sha256_bytes( + _canonical_json(_deterministic_core(determinism_record)).encode("utf-8") + ) + for out in determinism_record["output_files"]: + out["deterministic_core_sha256"] = ( + audit_core_hash + if out["relative_path"].endswith("audit_trail_index.json") + else det_core_hash + ) + if _deterministic_core(determinism_record) != _deterministic_core(disk_det): + problems.append("determinism_record.json out of sync with current inputs") + + # Re-guard on-disk bytes for banned/secret content. + for name, text in ( + ("audit_trail_index.json", AUDIT_INDEX_PATH.read_text(encoding="utf-8")), + ("determinism_record.json", DETERMINISM_PATH.read_text(encoding="utf-8")), + ): + try: + _guard_output(name, text) + except InvariantError as exc: + problems.append(str(exc)) + + if problems: + print(f"FAIL: {len(problems)} problem(s):", file=sys.stderr) + for p in problems: + print(f" - {p}", file=sys.stderr) + print("Run `python scripts/generate_supply_chain_audit.py` to refresh.", file=sys.stderr) + return 1 + + print( + f"OK: audit artefacts in sync (run_id {audit_index['deterministic_run_id']}, " + f"{audit_index['checked_artifacts_count']} artifacts)." + ) + return 0 + + +def main(argv: list[str]) -> int: + args = argv[1:] + cmd = args[0] if args else "generate" + try: + if cmd == "generate": + return cmd_generate() + if cmd == "check": + return cmd_check() + except InvariantError as exc: + print(f"FAIL (invariant): {exc}", file=sys.stderr) + return 1 + except OSError as exc: + print(f"FAIL (io): {exc}", file=sys.stderr) + return 2 + print(f"unknown command: {cmd!r} (generate|check)", file=sys.stderr) + return 2 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/tests/test_supply_chain_audit.py b/tests/test_supply_chain_audit.py new file mode 100644 index 0000000..5dd24b1 --- /dev/null +++ b/tests/test_supply_chain_audit.py @@ -0,0 +1,158 @@ +"""Tests for the supply-chain audit-trail index + determinism record stage. + +Exercises scripts/generate_supply_chain_audit.py directly (stdlib-only, offline) +against a temporary output directory so the committed artefacts are not touched. + +Covers: + - artefacts are generable and parse as JSON; + - required fields present in both records; + - deterministic_run_id / hash summary stable across two runs (same inputs); + - only the timestamp differs between runs (it lives in non_deterministic_zone); + - no obvious secret/PII in generated artefacts; + - no banned public-claim / codename string in generated artefacts; + - `check` reports in-sync, and detects tampering as drift. +""" +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPT = REPO_ROOT / "scripts" / "generate_supply_chain_audit.py" + + +def _load_module(tmp_path: Path): + """Import the generator with its output paths redirected into tmp_path.""" + spec = importlib.util.spec_from_file_location("gen_sc_audit", SCRIPT) + assert spec and spec.loader + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + mod.AUDIT_DIR = tmp_path / "audit" + mod.AUDIT_INDEX_PATH = mod.AUDIT_DIR / "audit_trail_index.json" + mod.DETERMINISM_PATH = mod.AUDIT_DIR / "determinism_record.json" + return mod + + +@pytest.fixture() +def gen(tmp_path): + return _load_module(tmp_path) + + +def test_generate_succeeds_and_parses(gen): + assert gen.cmd_generate() == 0 + audit = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + det = json.loads(gen.DETERMINISM_PATH.read_text(encoding="utf-8")) + assert isinstance(audit, dict) and isinstance(det, dict) + + +def test_audit_index_required_fields(gen): + gen.cmd_generate() + audit = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + for field in ( + "schema_version", + "repo", + "source_commit_sha", + "deterministic_run_id", + "checked_artifacts_count", + "checked_artifacts_hash_summary", + "validation_commands", + "validation_results", + "build_or_audit_events", + "stage_automation", + "notes", + "non_deterministic_zone", + ): + assert field in audit, f"missing {field}" + assert audit["repo"] == "Davincc77/klickdskill" + assert audit["checked_artifacts_count"] == 43 # 42 packs + manifest + assert isinstance(audit["build_or_audit_events"], list) + assert audit["build_or_audit_events"] + # validation_results must be empty by design (generator does not run them). + assert audit["validation_results"] == [] + + +def test_determinism_record_required_fields(gen): + gen.cmd_generate() + det = json.loads(gen.DETERMINISM_PATH.read_text(encoding="utf-8")) + for field in ( + "schema_version", + "hash_algo", + "deterministic_run_id", + "input_files", + "inputs_hash_summary", + "output_files", + "repeatability", + "non_deterministic_zone", + ): + assert field in det, f"missing {field}" + assert det["hash_algo"] == "sha256" + assert len(det["input_files"]) == 43 + for item in det["input_files"]: + assert len(item["sha256"]) == 64 + + +def test_deterministic_run_id_stable_across_runs(gen, tmp_path): + gen.cmd_generate() + first_audit = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + first_det = json.loads(gen.DETERMINISM_PATH.read_text(encoding="utf-8")) + + gen.cmd_generate() # second run, identical inputs + second_audit = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + second_det = json.loads(gen.DETERMINISM_PATH.read_text(encoding="utf-8")) + + assert first_audit["deterministic_run_id"] == second_audit["deterministic_run_id"] + assert first_det["deterministic_run_id"] == second_det["deterministic_run_id"] + assert ( + first_audit["checked_artifacts_hash_summary"] + == second_audit["checked_artifacts_hash_summary"] + ) + + +def test_only_timestamp_is_non_deterministic(gen): + """Two runs must agree on everything outside non_deterministic_zone.""" + gen.cmd_generate() + first = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + gen.cmd_generate() + second = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + first.pop("non_deterministic_zone") + second.pop("non_deterministic_zone") + assert first == second + + +def test_no_banned_claims_or_codename(gen): + gen.cmd_generate() + for path in (gen.AUDIT_INDEX_PATH, gen.DETERMINISM_PATH): + text = path.read_text(encoding="utf-8").lower() + for banned in gen.BANNED_SUBSTRINGS: + assert banned not in text, f"{path.name} contains banned {banned!r}" + + +def test_no_obvious_secret_or_pii(gen): + gen.cmd_generate() + for path in (gen.AUDIT_INDEX_PATH, gen.DETERMINISM_PATH): + text = path.read_text(encoding="utf-8") + assert gen._scan_secrets(text) == [], f"{path.name} has secret/PII pattern" + + +def test_check_reports_in_sync_after_generate(gen): + gen.cmd_generate() + assert gen.cmd_check() == 0 + + +def test_check_detects_tampering(gen): + gen.cmd_generate() + audit = json.loads(gen.AUDIT_INDEX_PATH.read_text(encoding="utf-8")) + audit["checked_artifacts_count"] = 999 # tamper inside deterministic core + gen.AUDIT_INDEX_PATH.write_text(json.dumps(audit, indent=2), encoding="utf-8") + assert gen.cmd_check() == 1 + + +def test_check_detects_banned_string_injection(gen): + gen.cmd_generate() + det = json.loads(gen.DETERMINISM_PATH.read_text(encoding="utf-8")) + det["non_deterministic_zone"]["comment"] = "universal standard" # banned + gen.DETERMINISM_PATH.write_text(json.dumps(det, indent=2), encoding="utf-8") + assert gen.cmd_check() == 1 From 75ad577efeb92b657d8b8770a1ebb4ccda713bc1 Mon Sep 17 00:00:00 2001 From: klickd-agent Date: Tue, 2 Jun 2026 11:30:16 +0000 Subject: [PATCH 3/8] feat(supply-chain): tool-backed logical diff report for skill/pack candidates Add the logical-diff stage of the x.klickd skill/pack supply chain: a deterministic, offline CLI that compares a previous candidate (--before) against a new one (--after) and classifies governance/guardrail/memory/ evidence/claim/public-boundary changes rather than raw JSON lines. Hard-fails (exit 1) on guardrail lowering, claim-boundary violations, or public/private-boundary violations; exit 0 when no blocking finding. The deterministic_diff_id is a sha256 over input hashes + sorted findings, clock- and host-independent. Stdlib only, no network. Adds 10 before/after fixtures and 17 tests covering each blocking and non-blocking class plus determinism and CLI exit codes. Operator README under .internal-skills/supply-chain/diff/. No claim of full end-to-end automation; downstream stages remain planned. Co-Authored-By: Claude Opus 4.7 --- .internal-skills/supply-chain/diff/README.md | 95 +++ scripts/generate_supply_chain_diff.py | 751 ++++++++++++++++++ .../supply_chain_diff/after_benign.json | 103 +++ .../after_claim_violation.json | 99 +++ .../after_evidence_weakened.json | 98 +++ .../after_floor_removed.json | 97 +++ .../after_governance_violation.json | 98 +++ .../after_guardrail_lowered.json | 98 +++ .../after_memory_changed.json | 98 +++ .../after_public_violation.json | 98 +++ .../supply_chain_diff/after_unchanged.json | 98 +++ tests/fixtures/supply_chain_diff/before.json | 60 ++ tests/test_supply_chain_diff.py | 227 ++++++ 13 files changed, 2020 insertions(+) create mode 100644 .internal-skills/supply-chain/diff/README.md create mode 100644 scripts/generate_supply_chain_diff.py create mode 100644 tests/fixtures/supply_chain_diff/after_benign.json create mode 100644 tests/fixtures/supply_chain_diff/after_claim_violation.json create mode 100644 tests/fixtures/supply_chain_diff/after_evidence_weakened.json create mode 100644 tests/fixtures/supply_chain_diff/after_floor_removed.json create mode 100644 tests/fixtures/supply_chain_diff/after_governance_violation.json create mode 100644 tests/fixtures/supply_chain_diff/after_guardrail_lowered.json create mode 100644 tests/fixtures/supply_chain_diff/after_memory_changed.json create mode 100644 tests/fixtures/supply_chain_diff/after_public_violation.json create mode 100644 tests/fixtures/supply_chain_diff/after_unchanged.json create mode 100644 tests/fixtures/supply_chain_diff/before.json create mode 100644 tests/test_supply_chain_diff.py diff --git a/.internal-skills/supply-chain/diff/README.md b/.internal-skills/supply-chain/diff/README.md new file mode 100644 index 0000000..0488536 --- /dev/null +++ b/.internal-skills/supply-chain/diff/README.md @@ -0,0 +1,95 @@ +# Supply-chain logical diff (stage: diff report) + +This directory holds the output of the **logical diff** stage of the x.klickd +skill/pack supply chain. It is one tool-backed stage, not the full pipeline, +and makes **no claim of total automation**. + +## What it does + +`scripts/generate_supply_chain_diff.py` compares a previous version of a +skill/pack candidate (`--before`) against a new version (`--after`) and +classifies the changes that matter for governance, security and claim +discipline — not just raw JSON/text line changes. + +It is meant to help a human/agent reviewer decide: + +- candidate acceptable, +- premium pass required, +- immediate rejection, +- rollback / deprecation required. + +## Usage + +```bash +python scripts/generate_supply_chain_diff.py \ + --before path/to/before.json \ + --after path/to/after.json \ + --out .internal-skills/supply-chain/diff/report.json +``` + +- Prints the report to stdout (suppress with `--quiet`). +- Writes the report to `--out` when given. +- Standard library only. No network, no provider calls, no paid resources. + +## Change classification + +`added`, `removed`, `changed`, `unchanged`, `risk_raised`, +`guardrail_lowered`, `evidence_changed`, `governance_changed`, +`memory_policy_changed`, `public_boundary_changed`, `claim_boundary_changed`. + +## Exit codes + +| Code | Meaning | +|---|---| +| 0 | no blocking finding | +| 1 | at least one **blocking** finding (guardrail lowered, claim-boundary, or public/private-boundary violation) | +| 2 | usage / input error (missing or unparseable input) | + +## Blocking (hard-fail) conditions + +Per the supply-chain rules, any **lowering of a non-lowerable safeguard** is a +hard fail rather than a silent change. Blocking findings include: + +- a verification gate weakened (`block` → `confirm` → `silent`) or removed; +- a `human_veto.non_lowerable_floor` entry removed, or `raise_only` disabled; +- `evidence_policy.required_for_claims` / `pointer_only` turned off; +- `human_authority.final_decision_owner` moved off `human_carrier`; +- `_pack_metadata.claims_v41_ga` flipped to `true`, or `non_normative` dropped; +- a banned public claim introduced (e.g. "universal standard", "automatic + GDPR / EU AI Act compliance", "proven benchmark superiority"); +- an internal codename leaking into the candidate; +- `contains_real_pii` / `contains_secrets` flipped to `true`; +- the `encrypted` flag downgraded `true` → `false`; +- a `forbidden_fields` entry removed. + +Non-blocking but flagged: memory-policy changes, evidence-policy shape +changes, agent-role escalation (`risk_raised`), and generic added/removed/ +changed pack keys. + +## Determinism + +`deterministic_diff_id` is a `sha256:` over the before/after input hashes plus +the sorted, normalized findings. It does not depend on the clock, host, or run +order. Two runs over identical inputs produce an identical id. Any clock-based +marker a caller adds lives in `non_deterministic_zone` and is excluded from the +hash. + +## Tool-vs-planned matrix (this stage) + +| Capability | State | +|---|---| +| logical diff classification (governance/guardrail/memory/evidence/claim/public boundary) | **tool** (this stage) | +| deterministic diff id + report | **tool** (this stage) | +| hard-fail on guardrail lowering / claim / public-boundary violation | **tool** (this stage) | +| threat model, license check, source-freshness, full PII/secrets scanner | **planned** | +| candidate-skill generation, context-graph generation | **planned** | +| premium pass | **manual** (human/agent, post-diff) | + +## Known limits + +- The diff understands the documented x.klickd pack shape. Renamed or + restructured roots are reported as generic `changed` rather than mapped to a + semantic class. +- The banned-claim and codename checks are substring tripwires on the + candidate document, not a general-purpose PII/secrets scanner. +- This stage does not generate, promote, release, tag or publish anything. diff --git a/scripts/generate_supply_chain_diff.py b/scripts/generate_supply_chain_diff.py new file mode 100644 index 0000000..171e2ea --- /dev/null +++ b/scripts/generate_supply_chain_diff.py @@ -0,0 +1,751 @@ +#!/usr/bin/env python3 +"""Offline logical-diff CLI for x.klickd skill/pack candidates. + +This is the supply-chain *logical diff* stage. It compares a previous version +(``--before``) of a skill/pack candidate against a new version (``--after``) +and classifies the changes that *matter* for governance, security and claim +discipline -- not just raw JSON/text line changes. + +It is NOT a full end-to-end supply chain and makes no claim of total +automation. It is one tool-backed stage: it produces a deterministic report +that helps a human/agent reviewer decide whether a candidate is acceptable, +needs a premium pass, must be rejected, or requires rollback/deprecation. + +Scope of what it understands (semantic, not just textual): + + * governance -- human_veto / human_authority / gate policy + * guardrails -- gate levels (block > confirm > silent), raise_only, + claim_grounding_required, non_lowerable_floor + * memory policy -- memory_scope / memory_segments[].policy / + structured_memory.policy + * evidence / proofs -- evidence_policy (required_for_claims, pointer_only) + * claim boundary -- _pack_metadata.claims_v41_ga / non_normative / + contains_real_pii / contains_secrets + banned claim + strings introduced anywhere in the after document + * public/private -- internal codename leak, encrypted flag downgrade, + forbidden_fields removal + * competencies -- added/removed competency refs + * risk -- risk markers raised + +Change classification (per the brief): + + added, removed, changed, unchanged, risk_raised, guardrail_lowered, + evidence_changed, governance_changed, memory_policy_changed, + public_boundary_changed, claim_boundary_changed + +Exit codes: + + 0 no blocking finding + 1 at least one BLOCKING finding (guardrail lowered, claim-boundary + violation, or public/private-boundary violation) + 2 usage / input error (cannot read or parse an input) + +Determinism: the report's ``deterministic_diff_id`` is a sha256 over the +before/after input hashes plus the sorted, normalized findings. It does not +depend on the clock, host, or run order. A ``generated_at`` field, if present, +lives in ``non_deterministic_zone`` and is excluded from every hash. + +No network. No provider calls. No paid resources. Standard library only. +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + +SCHEMA_VERSION = "supply-chain-diff/0.1" + +# --- claim discipline ------------------------------------------------------- +# Substrings that must never be *introduced* by a candidate. Matching is +# case-insensitive over the serialized after-document. These mirror the +# repo-wide banned-claim list; introducing any is a claim-boundary violation. +BANNED_CLAIM_SUBSTRINGS = ( + "universal standard", + "automatic gdpr compliance", + "automatic gdpr/eu ai act compliance", + "automatic eu ai act compliance", + "proven benchmark superiority", + "benchmark superiority", + "guaranteed compliance", +) + +# Internal codename that must never leak into a candidate artifact. +INTERNAL_CODENAME = "chimera" + +# Ordered guardrail strength. Lower index == stronger guardrail. Moving a gate +# to a higher index (weaker) is a guardrail-lowering event. +GATE_LEVEL_ORDER = {"block": 0, "confirm": 1, "silent": 2, "off": 3, "none": 3} + + +def _read_json(path: Path) -> dict[str, Any]: + data = path.read_bytes() + return json.loads(data.decode("utf-8")) + + +def _sha256_file(path: Path) -> str: + return hashlib.sha256(path.read_bytes()).hexdigest() + + +def _pack(doc: dict[str, Any]) -> dict[str, Any]: + """Return the x_klickd_pack body, or the document itself as a fallback.""" + pack = doc.get("x_klickd_pack") + return pack if isinstance(pack, dict) else doc + + +def _meta(doc: dict[str, Any]) -> dict[str, Any]: + meta = doc.get("_pack_metadata") + return meta if isinstance(meta, dict) else {} + + +def _gates_by_id(pack: dict[str, Any]) -> dict[str, dict[str, Any]]: + """Flatten verification_gates.gates into {id: gate}.""" + vg = pack.get("verification_gates") + out: dict[str, dict[str, Any]] = {} + if isinstance(vg, dict) and isinstance(vg.get("gates"), list): + for gate in vg["gates"]: + if isinstance(gate, dict) and gate.get("id"): + out[str(gate["id"])] = gate + return out + + +def _gate_strength(level: Any) -> int: + return GATE_LEVEL_ORDER.get(str(level).lower(), 1) + + +def _as_set(value: Any) -> set[str]: + if isinstance(value, list): + return {str(v) for v in value} + return set() + + +def _finding( + kind: str, + path: str, + *, + before: Any = None, + after: Any = None, + severity: str = "info", + blocking: bool = False, + detail: str = "", +) -> dict[str, Any]: + return { + "kind": kind, + "path": path, + "before": before, + "after": after, + "severity": severity, + "blocking": blocking, + "detail": detail, + } + + +def _diff_gates( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + bg = _gates_by_id(before_pack) + ag = _gates_by_id(after_pack) + + for gid in sorted(set(bg) | set(ag)): + b = bg.get(gid) + a = ag.get(gid) + path = f"verification_gates.gates[{gid}].level" + if b is not None and a is None: + # A removed gate is a removed guardrail -> lowering. + findings.append( + _finding( + "guardrail_lowered", + f"verification_gates.gates[{gid}]", + before=b.get("level"), + after=None, + severity="high", + blocking=True, + detail=f"gate {gid!r} removed", + ) + ) + continue + if a is not None and b is None: + findings.append( + _finding( + "added", + f"verification_gates.gates[{gid}]", + before=None, + after=a.get("level"), + severity="info", + detail=f"gate {gid!r} added", + ) + ) + continue + bl, al = b.get("level"), a.get("level") + if bl == al: + continue + if _gate_strength(al) > _gate_strength(bl): + findings.append( + _finding( + "guardrail_lowered", + path, + before=bl, + after=al, + severity="high", + blocking=True, + detail=f"gate {gid!r} weakened {bl!r} -> {al!r}", + ) + ) + else: + findings.append( + _finding( + "governance_changed", + path, + before=bl, + after=al, + severity="info", + detail=f"gate {gid!r} strengthened {bl!r} -> {al!r}", + ) + ) + return findings + + +def _diff_floor( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + """non_lowerable_floor entries must not disappear.""" + findings: list[dict[str, Any]] = [] + bv = before_pack.get("human_veto") or {} + av = after_pack.get("human_veto") or {} + b_floor = _as_set(bv.get("non_lowerable_floor")) + a_floor = _as_set(av.get("non_lowerable_floor")) + for removed in sorted(b_floor - a_floor): + findings.append( + _finding( + "guardrail_lowered", + "human_veto.non_lowerable_floor", + before=removed, + after=None, + severity="high", + blocking=True, + detail=f"non-lowerable floor entry {removed!r} removed", + ) + ) + for added in sorted(a_floor - b_floor): + findings.append( + _finding( + "governance_changed", + "human_veto.non_lowerable_floor", + before=None, + after=added, + severity="info", + detail=f"floor entry {added!r} added", + ) + ) + # raise_only must not go true -> false. + if bv.get("raise_only") is True and av.get("raise_only") is False: + findings.append( + _finding( + "guardrail_lowered", + "human_veto.raise_only", + before=True, + after=False, + severity="high", + blocking=True, + detail="raise_only disabled", + ) + ) + + # gates.verification_gates_default flags. + bd = (before_pack.get("gates") or {}).get("verification_gates_default") or {} + ad = (after_pack.get("gates") or {}).get("verification_gates_default") or {} + for flag in ("raise_only", "claim_grounding_required"): + if bd.get(flag) is True and ad.get(flag) is False: + findings.append( + _finding( + "guardrail_lowered", + f"gates.verification_gates_default.{flag}", + before=True, + after=False, + severity="high", + blocking=True, + detail=f"{flag} disabled", + ) + ) + return findings + + +def _diff_governance( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + ba = before_pack.get("human_authority") or {} + aa = after_pack.get("human_authority") or {} + # Final decision must remain human-owned. + b_owner = ba.get("final_decision_owner") + a_owner = aa.get("final_decision_owner") + if b_owner != a_owner: + lowered = b_owner == "human_carrier" and a_owner != "human_carrier" + findings.append( + _finding( + "guardrail_lowered" if lowered else "governance_changed", + "human_authority.final_decision_owner", + before=b_owner, + after=a_owner, + severity="high" if lowered else "info", + blocking=lowered, + detail="final decision owner changed", + ) + ) + # agent_role escalation from advisory to autonomous is risk-raising. + if ba.get("agent_role") != aa.get("agent_role"): + raised = ba.get("agent_role") == "advisory" and aa.get("agent_role") not in ( + "advisory", + None, + ) + findings.append( + _finding( + "risk_raised" if raised else "governance_changed", + "human_authority.agent_role", + before=ba.get("agent_role"), + after=aa.get("agent_role"), + severity="high" if raised else "info", + blocking=False, + detail="agent role changed", + ) + ) + return findings + + +def _diff_evidence( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + be = before_pack.get("evidence_policy") or {} + ae = after_pack.get("evidence_policy") or {} + if be == ae: + return findings + # Specific weakenings are blocking; other shape changes are just flagged. + for flag in ("required_for_claims", "pointer_only"): + if be.get(flag) is True and ae.get(flag) is False: + findings.append( + _finding( + "guardrail_lowered", + f"evidence_policy.{flag}", + before=True, + after=False, + severity="high", + blocking=True, + detail=f"evidence policy {flag} disabled", + ) + ) + findings.append( + _finding( + "evidence_changed", + "evidence_policy", + before=be, + after=ae, + severity="medium", + detail="evidence policy shape changed", + ) + ) + return findings + + +def _diff_memory( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + if before_pack.get("memory_scope") != after_pack.get("memory_scope"): + findings.append( + _finding( + "memory_policy_changed", + "memory_scope", + before=before_pack.get("memory_scope"), + after=after_pack.get("memory_scope"), + severity="medium", + detail="memory scope changed", + ) + ) + b_seg = { + s.get("id"): s.get("policy") + for s in before_pack.get("memory_segments", []) + if isinstance(s, dict) + } + a_seg = { + s.get("id"): s.get("policy") + for s in after_pack.get("memory_segments", []) + if isinstance(s, dict) + } + for sid in sorted(set(b_seg) | set(a_seg), key=lambda x: str(x)): + if b_seg.get(sid) != a_seg.get(sid): + findings.append( + _finding( + "memory_policy_changed", + f"memory_segments[{sid}].policy", + before=b_seg.get(sid), + after=a_seg.get(sid), + severity="medium", + detail=f"memory segment {sid!r} policy changed", + ) + ) + bsm = (before_pack.get("structured_memory") or {}).get("policy") + asm = (after_pack.get("structured_memory") or {}).get("policy") + if bsm != asm: + findings.append( + _finding( + "memory_policy_changed", + "structured_memory.policy", + before=bsm, + after=asm, + severity="medium", + detail="structured memory policy changed", + ) + ) + return findings + + +def _diff_competencies( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + b = { + c.get("competency_ref") + for c in before_pack.get("competencies", []) + if isinstance(c, dict) and c.get("competency_ref") + } + a = { + c.get("competency_ref") + for c in after_pack.get("competencies", []) + if isinstance(c, dict) and c.get("competency_ref") + } + for ref in sorted(a - b): + findings.append( + _finding("added", f"competencies[{ref}]", after=ref, detail="competency added") + ) + for ref in sorted(b - a): + findings.append( + _finding( + "removed", f"competencies[{ref}]", before=ref, detail="competency removed" + ) + ) + return findings + + +def _diff_claim_boundary( + before_doc: dict[str, Any], after_doc: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + bm = _meta(before_doc) + am = _meta(after_doc) + + # claims_v41_ga flipping false -> true is a claim-boundary violation. + if bm.get("claims_v41_ga") in (False, None) and am.get("claims_v41_ga") is True: + findings.append( + _finding( + "claim_boundary_changed", + "_pack_metadata.claims_v41_ga", + before=bm.get("claims_v41_ga"), + after=True, + severity="high", + blocking=True, + detail="candidate now claims v4.1 GA", + ) + ) + # non_normative true -> false escalates a candidate to a normative claim. + if bm.get("non_normative") is True and am.get("non_normative") is False: + findings.append( + _finding( + "claim_boundary_changed", + "_pack_metadata.non_normative", + before=True, + after=False, + severity="high", + blocking=True, + detail="candidate dropped non_normative flag", + ) + ) + + # Banned claim substrings newly introduced anywhere in the after document. + after_blob = json.dumps(after_doc, ensure_ascii=False).lower() + before_blob = json.dumps(before_doc, ensure_ascii=False).lower() + for needle in BANNED_CLAIM_SUBSTRINGS: + if needle in after_blob and needle not in before_blob: + findings.append( + _finding( + "claim_boundary_changed", + "document", + after=needle, + severity="high", + blocking=True, + detail=f"banned claim introduced: {needle!r}", + ) + ) + return findings + + +def _diff_public_boundary( + before_doc: dict[str, Any], after_doc: dict[str, Any] +) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + before_pack = _pack(before_doc) + after_pack = _pack(after_doc) + bm = _meta(before_doc) + am = _meta(after_doc) + + # Internal codename newly leaking into the candidate. + after_blob = json.dumps(after_doc, ensure_ascii=False).lower() + before_blob = json.dumps(before_doc, ensure_ascii=False).lower() + if INTERNAL_CODENAME in after_blob and INTERNAL_CODENAME not in before_blob: + findings.append( + _finding( + "public_boundary_changed", + "document", + after=INTERNAL_CODENAME, + severity="high", + blocking=True, + detail="internal codename leaked into candidate", + ) + ) + + # PII / secrets markers flipping false -> true. + for flag in ("contains_real_pii", "contains_secrets"): + if bm.get(flag) in (False, None) and am.get(flag) is True: + findings.append( + _finding( + "public_boundary_changed", + f"_pack_metadata.{flag}", + before=bm.get(flag), + after=True, + severity="high", + blocking=True, + detail=f"{flag} now true", + ) + ) + + # encrypted true -> false is a confidentiality downgrade. + if before_doc.get("encrypted") is True and after_doc.get("encrypted") is False: + findings.append( + _finding( + "public_boundary_changed", + "encrypted", + before=True, + after=False, + severity="high", + blocking=True, + detail="encryption flag downgraded", + ) + ) + + # forbidden_fields entries removed weaken the private/public boundary. + b_ff = _as_set(before_pack.get("forbidden_fields")) + a_ff = _as_set(after_pack.get("forbidden_fields")) + for removed in sorted(b_ff - a_ff): + findings.append( + _finding( + "public_boundary_changed", + "forbidden_fields", + before=removed, + after=None, + severity="high", + blocking=True, + detail=f"forbidden_fields entry {removed!r} removed", + ) + ) + return findings + + +def _generic_changed( + before_pack: dict[str, Any], after_pack: dict[str, Any] +) -> list[dict[str, Any]]: + """Coarse top-level changed/added/removed over pack keys we don't model + semantically above. Keeps the summary honest without re-flagging the keys + already covered by dedicated analyzers.""" + covered = { + "verification_gates", + "human_veto", + "human_authority", + "gates", + "evidence_policy", + "memory_scope", + "memory_segments", + "structured_memory", + "competencies", + } + findings: list[dict[str, Any]] = [] + keys = sorted((set(before_pack) | set(after_pack)) - covered) + for k in keys: + in_b = k in before_pack + in_a = k in after_pack + if in_b and not in_a: + findings.append(_finding("removed", f"x_klickd_pack.{k}", before="")) + elif in_a and not in_b: + findings.append(_finding("added", f"x_klickd_pack.{k}", after="")) + elif before_pack.get(k) != after_pack.get(k): + findings.append(_finding("changed", f"x_klickd_pack.{k}")) + return findings + + +def _normalize_for_hash(finding: dict[str, Any]) -> str: + return json.dumps( + {k: finding[k] for k in ("kind", "path", "before", "after", "blocking")}, + sort_keys=True, + ensure_ascii=False, + default=str, + ) + + +def build_report(before_path: Path, after_path: Path) -> dict[str, Any]: + before_doc = _read_json(before_path) + after_doc = _read_json(after_path) + before_pack = _pack(before_doc) + after_pack = _pack(after_doc) + + findings: list[dict[str, Any]] = [] + findings += _diff_gates(before_pack, after_pack) + findings += _diff_floor(before_pack, after_pack) + findings += _diff_governance(before_pack, after_pack) + findings += _diff_evidence(before_pack, after_pack) + findings += _diff_memory(before_pack, after_pack) + findings += _diff_competencies(before_pack, after_pack) + findings += _diff_claim_boundary(before_doc, after_doc) + findings += _diff_public_boundary(before_doc, after_doc) + findings += _generic_changed(before_pack, after_pack) + + # Deterministic ordering: sort by (kind, path, detail). + findings.sort(key=lambda f: (f["kind"], f["path"], f["detail"])) + + before_hash = _sha256_file(before_path) + after_hash = _sha256_file(after_path) + + summary: dict[str, int] = {} + for f in findings: + summary[f["kind"]] = summary.get(f["kind"], 0) + 1 + if not findings: + summary["unchanged"] = 1 + + blocked = [f for f in findings if f["blocking"]] + high_risk = [f for f in findings if f["severity"] == "high" and not f["blocking"]] + + diff_id_material = json.dumps( + { + "before_hash": before_hash, + "after_hash": after_hash, + "findings": [_normalize_for_hash(f) for f in findings], + }, + sort_keys=True, + ensure_ascii=False, + ) + deterministic_diff_id = "sha256:" + hashlib.sha256( + diff_id_material.encode("utf-8") + ).hexdigest() + + recommendations = _recommend(blocked, high_risk, findings) + + return { + "schema_version": SCHEMA_VERSION, + "before_path": str(before_path), + "after_path": str(after_path), + "before_hash": before_hash, + "after_hash": after_hash, + "deterministic_diff_id": deterministic_diff_id, + "summary": dict(sorted(summary.items())), + "changed_paths": sorted({f["path"] for f in findings if f["kind"] != "unchanged"}), + "findings": findings, + "high_risk_findings": high_risk, + "blocked_findings": blocked, + "recommendations": recommendations, + "non_deterministic_zone": { + "note": ( + "Fields here are excluded from deterministic_diff_id. None are " + "emitted by default; a generated_at marker may be added by a " + "caller without affecting the diff id." + ) + }, + } + + +def _recommend( + blocked: list[dict[str, Any]], + high_risk: list[dict[str, Any]], + findings: list[dict[str, Any]], +) -> list[str]: + recs: list[str] = [] + if blocked: + recs.append("REJECT_OR_ROLLBACK: blocking finding(s) present; do not promote.") + kinds = sorted({f["kind"] for f in blocked}) + if "guardrail_lowered" in kinds: + recs.append( + "guardrail_lowered detected: a non-lowerable safeguard was weakened " + "or removed. Requires explicit human veto review." + ) + if "claim_boundary_changed" in kinds: + recs.append( + "claim_boundary_changed detected: candidate introduces a claim that " + "is not proven/bounded. Strip the claim before any further stage." + ) + if "public_boundary_changed" in kinds: + recs.append( + "public_boundary_changed detected: private/internal content or PII/" + "secrets risk leaking. Quarantine the candidate." + ) + elif high_risk: + recs.append( + "PREMIUM_PASS_REQUIRED: high-risk (non-blocking) changes need human/agent " + "review before promotion." + ) + elif findings: + recs.append("ACCEPTABLE_WITH_REVIEW: only non-critical changes detected.") + else: + recs.append("UNCHANGED: no logical change detected between before and after.") + return recs + + +def main(argv: list[str]) -> int: + parser = argparse.ArgumentParser( + description="Logical diff between two x.klickd skill/pack candidates." + ) + parser.add_argument("--before", required=True, help="path to previous version JSON") + parser.add_argument("--after", required=True, help="path to candidate version JSON") + parser.add_argument( + "--out", + default=None, + help="optional path to write the deterministic JSON report", + ) + parser.add_argument( + "--quiet", + action="store_true", + help="suppress the report on stdout (still writes --out if given)", + ) + args = parser.parse_args(argv[1:]) + + before_path = Path(args.before) + after_path = Path(args.after) + for p in (before_path, after_path): + if not p.exists(): + print(f"error: input not found: {p}", file=sys.stderr) + return 2 + try: + report = build_report(before_path, after_path) + except (json.JSONDecodeError, UnicodeDecodeError) as exc: + print(f"error: could not parse input JSON: {exc}", file=sys.stderr) + return 2 + + rendered = json.dumps(report, indent=2, ensure_ascii=False, sort_keys=False) + if args.out: + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(rendered + "\n", encoding="utf-8") + if not args.quiet: + print(rendered) + + if report["blocked_findings"]: + print( + f"BLOCKED: {len(report['blocked_findings'])} blocking finding(s).", + file=sys.stderr, + ) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/tests/fixtures/supply_chain_diff/after_benign.json b/tests/fixtures/supply_chain_diff/after_benign.json new file mode 100644 index 0000000..5de1330 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_benign.json @@ -0,0 +1,103 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.2.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + }, + { + "competency_ref": "esco:S2.0", + "scheme": "esco", + "prefLabel": "gamma" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_claim_violation.json b/tests/fixtures/supply_chain_diff/after_claim_violation.json new file mode 100644 index 0000000..98931be --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_claim_violation.json @@ -0,0 +1,99 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": true, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ], + "target_user": "Now a universal standard with automatic GDPR compliance." + } +} diff --git a/tests/fixtures/supply_chain_diff/after_evidence_weakened.json b/tests/fixtures/supply_chain_diff/after_evidence_weakened.json new file mode 100644 index 0000000..3c5dc86 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_evidence_weakened.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": false, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_floor_removed.json b/tests/fixtures/supply_chain_diff/after_floor_removed.json new file mode 100644 index 0000000..28ed86c --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_floor_removed.json @@ -0,0 +1,97 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": false, + "non_lowerable_floor": [ + "risk_class_below_evidence" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_governance_violation.json b/tests/fixtures/supply_chain_diff/after_governance_violation.json new file mode 100644 index 0000000..953243f --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_governance_violation.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "agent", + "agent_role": "autonomous", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_guardrail_lowered.json b/tests/fixtures/supply_chain_diff/after_guardrail_lowered.json new file mode 100644 index 0000000..912d0ae --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_guardrail_lowered.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "confirm", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_memory_changed.json b/tests/fixtures/supply_chain_diff/after_memory_changed.json new file mode 100644 index 0000000..346fa00 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_memory_changed.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "store_inline" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "shared_global", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/after_public_violation.json b/tests/fixtures/supply_chain_diff/after_public_violation.json new file mode 100644 index 0000000..a0f1365 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_public_violation.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": true, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "scoring_rubric", + "tone_rules" + ], + "internal_note": "see chimera internal docs" + } +} diff --git a/tests/fixtures/supply_chain_diff/after_unchanged.json b/tests/fixtures/supply_chain_diff/after_unchanged.json new file mode 100644 index 0000000..bfde8b2 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/after_unchanged.json @@ -0,0 +1,98 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + { + "competency_ref": "esco:S1.0", + "scheme": "esco", + "prefLabel": "alpha" + }, + { + "competency_ref": "esco:S1.5", + "scheme": "esco", + "prefLabel": "beta" + } + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + { + "id": "works", + "scope": "memory.x_klickd.example.works", + "policy": "pointer_only_attribution" + } + ], + "structured_memory": { + "scope": "memory.x_klickd.example", + "policy": "pack_scoped_only", + "entries": [] + }, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": [ + "risk_class_below_evidence", + "skip_counsel_for_definitive_decision" + ] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + { + "id": "compliance-attestation", + "action_class": "compliance_attestation", + "level": "confirm", + "reason": "confirm required" + }, + { + "id": "risk-class-downgrade", + "action_class": "risk_class_below_evidence", + "level": "block", + "reason": "blocked" + }, + { + "id": "counsel-escalation-skip", + "action_class": "skip_counsel_for_definitive_decision", + "level": "block", + "reason": "blocked" + } + ] + }, + "forbidden_fields": [ + "system_prompt_overrides", + "scoring_rubric", + "tone_rules" + ] + } +} diff --git a/tests/fixtures/supply_chain_diff/before.json b/tests/fixtures/supply_chain_diff/before.json new file mode 100644 index 0000000..ab267d5 --- /dev/null +++ b/tests/fixtures/supply_chain_diff/before.json @@ -0,0 +1,60 @@ +{ + "klickd_version": "4.0", + "payload_schema_version": "4.0.0-preview.1", + "encrypted": false, + "domain": "compliance", + "profile_kind": "carrier_competency_pack", + "_pack_metadata": { + "kind": "x_klickd_candidate_skill_pack", + "non_normative": true, + "claims_v41_ga": false, + "contains_real_pii": false, + "contains_secrets": false, + "status": "candidate_mapped" + }, + "x_klickd_pack": { + "pack": "x.klickd/example", + "pack_version": "0.1.0-candidate", + "competencies": [ + {"competency_ref": "esco:S1.0", "scheme": "esco", "prefLabel": "alpha"}, + {"competency_ref": "esco:S1.5", "scheme": "esco", "prefLabel": "beta"} + ], + "memory_scope": "memory.x_klickd.example", + "memory_segments": [ + {"id": "works", "scope": "memory.x_klickd.example.works", "policy": "pointer_only_attribution"} + ], + "structured_memory": {"scope": "memory.x_klickd.example", "policy": "pack_scoped_only", "entries": []}, + "gates": { + "verification_gates_default": { + "raise_only": true, + "claim_grounding_required": true, + "reversibility_threshold": "low" + } + }, + "human_veto": { + "owner": "human_carrier", + "raise_only": true, + "non_lowerable_floor": ["risk_class_below_evidence", "skip_counsel_for_definitive_decision"] + }, + "human_authority": { + "final_decision_owner": "human_carrier", + "agent_role": "advisory", + "escalation": "qualified_specialist_per_domain" + }, + "evidence_policy": { + "required_for_claims": true, + "pointer_only": true, + "attestation_shape_ref": "rfc-002#8b" + }, + "verification_gates": { + "version": 1, + "user_default": "silent", + "gates": [ + {"id": "compliance-attestation", "action_class": "compliance_attestation", "level": "confirm", "reason": "confirm required"}, + {"id": "risk-class-downgrade", "action_class": "risk_class_below_evidence", "level": "block", "reason": "blocked"}, + {"id": "counsel-escalation-skip", "action_class": "skip_counsel_for_definitive_decision", "level": "block", "reason": "blocked"} + ] + }, + "forbidden_fields": ["system_prompt_overrides", "scoring_rubric", "tone_rules"] + } +} diff --git a/tests/test_supply_chain_diff.py b/tests/test_supply_chain_diff.py new file mode 100644 index 0000000..5df64fb --- /dev/null +++ b/tests/test_supply_chain_diff.py @@ -0,0 +1,227 @@ +"""Tests for scripts/generate_supply_chain_diff.py. + +The logical-diff stage compares a previous skill/pack candidate against a new +one and classifies the changes that matter for governance, security and claim +discipline. These tests exercise the static fixtures under +tests/fixtures/supply_chain_diff/ plus a couple of in-memory edge cases. +""" +from __future__ import annotations + +import importlib.util +import json +import sys +from pathlib import Path + +import pytest + +REPO = Path(__file__).resolve().parent.parent +SCRIPT = REPO / "scripts" / "generate_supply_chain_diff.py" +FIX = REPO / "tests" / "fixtures" / "supply_chain_diff" + + +def _load_module(): + spec = importlib.util.spec_from_file_location("generate_supply_chain_diff", SCRIPT) + assert spec is not None and spec.loader is not None + mod = importlib.util.module_from_spec(spec) + sys.modules["generate_supply_chain_diff"] = mod + spec.loader.exec_module(mod) + return mod + + +diff = _load_module() + +BEFORE = FIX / "before.json" + + +def _report(after_name: str) -> dict: + return diff.build_report(BEFORE, FIX / after_name) + + +def _kinds(report: dict) -> set[str]: + return {f["kind"] for f in report["findings"]} + + +# --- structural ------------------------------------------------------------ + + +def test_report_is_valid_json_and_has_required_fields(): + report = _report("after_benign.json") + rendered = json.dumps(report) # must serialize + parsed = json.loads(rendered) + for field in ( + "schema_version", + "before_path", + "after_path", + "before_hash", + "after_hash", + "deterministic_diff_id", + "summary", + "changed_paths", + "findings", + "high_risk_findings", + "blocked_findings", + "recommendations", + "non_deterministic_zone", + ): + assert field in parsed, f"missing field {field}" + assert parsed["deterministic_diff_id"].startswith("sha256:") + + +def test_unchanged_file_yields_stable_empty_diff(): + report = _report("after_unchanged.json") + assert report["findings"] == [] + assert report["changed_paths"] == [] + assert report["summary"] == {"unchanged": 1} + assert report["blocked_findings"] == [] + assert "UNCHANGED" in report["recommendations"][0] + + +def test_added_removed_changed_detected(): + report = _report("after_benign.json") + # added competency esco:S2.0 ; pack_version changed (generic) ; non-blocking + assert "added" in _kinds(report) + assert report["blocked_findings"] == [] + paths = report["changed_paths"] + assert any("competencies[esco:S2.0]" in p for p in paths) + assert any("pack_version" in p for p in paths) + + +# --- guardrails (blocking) ------------------------------------------------- + + +def test_guardrail_lowering_gate_level_blocks(): + report = _report("after_guardrail_lowered.json") + blocked = report["blocked_findings"] + assert blocked, "expected a blocking finding" + assert all(f["blocking"] for f in blocked) + assert any(f["kind"] == "guardrail_lowered" for f in blocked) + assert "REJECT_OR_ROLLBACK" in report["recommendations"][0] + + +def test_non_lowerable_floor_removal_and_raise_only_block(): + report = _report("after_floor_removed.json") + details = [f["detail"] for f in report["blocked_findings"]] + assert any("non-lowerable floor" in d for d in details) + assert any("raise_only disabled" in d for d in details) + + +def test_evidence_weakening_blocks_and_flags_change(): + report = _report("after_evidence_weakened.json") + kinds = _kinds(report) + assert "evidence_changed" in kinds + assert any( + f["kind"] == "guardrail_lowered" and "evidence" in f["detail"] + for f in report["blocked_findings"] + ) + + +def test_governance_owner_move_off_human_blocks(): + report = _report("after_governance_violation.json") + assert any( + f["kind"] == "guardrail_lowered" + and "final_decision_owner" in f["path"] + for f in report["blocked_findings"] + ) + # agent_role advisory -> autonomous is risk_raised (non-blocking) + assert any(f["kind"] == "risk_raised" for f in report["findings"]) + + +# --- claim / public boundary (blocking) ------------------------------------ + + +def test_claim_boundary_violation_detected(): + report = _report("after_claim_violation.json") + details = [f["detail"] for f in report["blocked_findings"]] + assert any("claims v4.1 GA" in d for d in details) + assert any("banned claim introduced" in d for d in details) + assert "claim_boundary_changed" in _kinds(report) + + +def test_public_boundary_violation_detected(): + report = _report("after_public_violation.json") + details = [f["detail"] for f in report["blocked_findings"]] + assert any("codename leaked" in d for d in details) + assert any("contains_real_pii now true" in d for d in details) + assert any("forbidden_fields entry" in d for d in details) + + +def test_memory_policy_change_flagged_non_blocking(): + report = _report("after_memory_changed.json") + assert "memory_policy_changed" in _kinds(report) + assert report["blocked_findings"] == [] + assert "PREMIUM_PASS_REQUIRED" not in report["recommendations"][0] + + +# --- determinism ----------------------------------------------------------- + + +def test_diff_id_is_deterministic_across_runs(): + r1 = _report("after_guardrail_lowered.json") + r2 = _report("after_guardrail_lowered.json") + assert r1["deterministic_diff_id"] == r2["deterministic_diff_id"] + assert r1["findings"] == r2["findings"] + + +def test_diff_id_changes_when_content_changes(): + r1 = _report("after_benign.json") + r2 = _report("after_guardrail_lowered.json") + assert r1["deterministic_diff_id"] != r2["deterministic_diff_id"] + + +# --- CLI exit codes -------------------------------------------------------- + + +def test_cli_exit_zero_on_unchanged(): + rc = diff.main( + ["prog", "--before", str(BEFORE), "--after", str(FIX / "after_unchanged.json"), "--quiet"] + ) + assert rc == 0 + + +def test_cli_exit_one_on_blocking(): + rc = diff.main( + [ + "prog", + "--before", + str(BEFORE), + "--after", + str(FIX / "after_guardrail_lowered.json"), + "--quiet", + ] + ) + assert rc == 1 + + +def test_cli_exit_two_on_missing_input(tmp_path): + rc = diff.main(["prog", "--before", str(BEFORE), "--after", str(tmp_path / "nope.json"), "--quiet"]) + assert rc == 2 + + +def test_cli_writes_out_file(tmp_path): + out = tmp_path / "nested" / "report.json" + diff.main( + [ + "prog", + "--before", + str(BEFORE), + "--after", + str(FIX / "after_benign.json"), + "--out", + str(out), + "--quiet", + ] + ) + assert out.exists() + parsed = json.loads(out.read_text()) + assert parsed["schema_version"] == diff.SCHEMA_VERSION + + +# --- artifacts hygiene ----------------------------------------------------- + + +def test_no_codename_leak_in_script_output_for_clean_input(): + # A clean before/after pair must not surface the internal codename in the + # report unless the candidate itself leaked it. + report = _report("after_benign.json") + blob = json.dumps(report).lower() + assert diff.INTERNAL_CODENAME not in blob From ccc8d58a620cad15edfc2a9337bf9c9e42973060 Mon Sep 17 00:00:00 2001 From: klickd-supply-chain-agent Date: Tue, 2 Jun 2026 11:40:12 +0000 Subject: [PATCH 4/8] add supply-chain source freshness + license check stage Implements pipeline steps 11 (license check) and 12 (source freshness) as a real, stdlib-only, offline CLI that triages skill/candidate source manifests. Classifies licenses (allowed/review/blocked/unknown) and freshness (fresh/review/stale/missing_date) for internal review, verifies local-file hashes, flags missing/non-https URLs, and emits a deterministic JSON report. Exits non-zero on blocking findings. Internal triage only: no legal advice, no compliance claim, no network I/O, no candidate generation. Independent of PRs #115/#116/#117. Co-Authored-By: Claude Opus 4.7 --- .../supply-chain/source-check/README.md | 134 +++++ .../source-check/example_source_manifest.json | 15 + scripts/check_supply_chain_sources.py | 522 ++++++++++++++++++ .../data/local_source.txt | 2 + .../manifest_academic_superseded.json | 26 + .../manifest_hash_match.json | 16 + .../manifest_hash_mismatch.json | 16 + .../manifest_license_blocked.json | 14 + .../manifest_license_unknown.json | 14 + .../manifest_missing_fields.json | 12 + .../manifest_missing_security_date.json | 14 + .../manifest_noncommercial_premium.json | 14 + .../supply_chain_sources/manifest_ok.json | 23 + .../manifest_stale_reference.json | 15 + .../manifest_stale_security.json | 15 + tests/test_supply_chain_sources.py | 242 ++++++++ 16 files changed, 1094 insertions(+) create mode 100644 .internal-skills/supply-chain/source-check/README.md create mode 100644 .internal-skills/supply-chain/source-check/example_source_manifest.json create mode 100644 scripts/check_supply_chain_sources.py create mode 100644 tests/fixtures/supply_chain_sources/data/local_source.txt create mode 100644 tests/fixtures/supply_chain_sources/manifest_academic_superseded.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_hash_match.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_hash_mismatch.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_license_blocked.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_license_unknown.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_missing_fields.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_missing_security_date.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_noncommercial_premium.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_ok.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_stale_reference.json create mode 100644 tests/fixtures/supply_chain_sources/manifest_stale_security.json create mode 100644 tests/test_supply_chain_sources.py diff --git a/.internal-skills/supply-chain/source-check/README.md b/.internal-skills/supply-chain/source-check/README.md new file mode 100644 index 0000000..866ba9c --- /dev/null +++ b/.internal-skills/supply-chain/source-check/README.md @@ -0,0 +1,134 @@ +# Supply-chain stage: source freshness + license check + +Internal operator note for `scripts/check_supply_chain_sources.py`. This stage +covers pipeline steps **11 (license check)** and **12 (source freshness)** for +the inputs that feed a skill / candidate build. It is an internal triage tool. + +**It is not legal advice and makes no compliance claim.** It classifies sources +into review buckets so a human/agent can decide; it never asserts that a source +*is* legally compatible. + +## Usage + +```bash +python scripts/check_supply_chain_sources.py \ + --manifest path/to/source_manifest.json \ + --out .internal-skills/supply-chain/source-check/report.json +``` + +Flags: + +- `--manifest` (required) — source manifest JSON (`xklickd.source_manifest.v0.1`). +- `--out` — write the deterministic JSON report to this path. +- `--quiet` — suppress stdout (report still written to `--out`). +- `--eval-date YYYY-MM-DD` — date used for age math. Set this in tests/CI for + reproducible freshness classification; defaults to today (UTC). +- `--min-metadata-fields N` — minimum descriptive fields per source (default 3). + +Stdlib-only, offline, no network I/O. + +## Manifest shape (`xklickd.source_manifest.v0.1`) + +```json +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-001", + "title": "Example", + "url": "https://example.org/spec", + "retrieved_at": "2026-06-02", + "published_at": "2026-01-01", + "license": "CC-BY-4.0", + "usage": "reference", + "category": "default", + "local_path": "data/file.txt", + "hash": "sha256:...", + "superseded": false, + "url_exempt": false + } + ] +} +``` + +Required per source: `id`, `title`, `license`, `usage`. Optional: `url`, +`published_at`, `retrieved_at`, `category`, `local_path` + `hash`, `superseded`, +`url_exempt`. + +## Classification + +License buckets (normalized, alias-tolerant): + +- **allowed**: MIT, Apache-2.0, BSD-2-Clause, BSD-3-Clause, CC0-1.0, CC-BY-4.0 +- **review**: CC-BY-SA-4.0, MPL-2.0, GPL-2.0, GPL-3.0, AGPL-3.0, custom, unknown +- **blocked**: proprietary-no-permission, no-redistribution, all-rights-reserved, + non-commercial-only (for commercial/premium reuse) +- **unknown**: anything unrecognized → review + +Freshness buckets (age budget by `category`, parameterizable in the script): + +- default review budget: 365 days +- security / regulatory: 90 days +- academic / theory: 1095 days (drops to 365 when `superseded: true`) + +Within budget → `fresh`; over budget but ≤ 2× → `review`; beyond → `stale`; +no `published_at` → `missing_date`. + +## Blocking conditions (exit 1) + +- a blocked license; +- a non-commercial license used for a commercial/premium `usage`; +- missing `url` (without `url_exempt`) or non-https `http://` url (without `url_exempt`); +- `missing_date` or `stale` for a `security`/`regulatory` source (critical); +- a referenced `local_path` that is missing or whose `hash` does not match; +- insufficient metadata (fewer than `--min-metadata-fields` descriptive fields); +- duplicate source `id`. + +Non-blocking → `review` for review/unknown licenses, future-dated or +past-budget non-critical sources, or a declared hash with no `local_path`. + +Exit codes: `0` clean, `1` one or more blocking findings, `2` usage / I/O / bad +schema. + +## Report fields + +`schema_version`, `manifest_path`, `manifest_hash`, `deterministic_report_id`, +`summary` (counts), `source_findings`, `blocked_findings`, `review_findings`, +`recommendations`, `non_deterministic_zone`. + +## Determinism + +`deterministic_report_id = sha256` over the manifest hash plus the sorted, +normalized per-source verdicts and findings. Identical `--manifest` and +`--eval-date` always produce the same id, independent of clock, host, or run +order. The wall-clock `evaluated_at` value and raw `age_days` are reported but +recorded under `non_deterministic_zone` / per-source and are excluded from the +id. A different `--eval-date` that flips a freshness class is a genuinely +different result and yields a different id by design. + +## Anti-mirage scope + +- The check reports only what it computes from the manifest. It does not + synthesize a "pass" for sources it cannot verify. +- A source with no clear origin (no url, no date, thin metadata) is flagged or + blocked, never silently accepted. +- No web crawling: freshness uses declared dates, not live fetches, so the + result is deterministic and testable. + +## Known limits + +- Triage only; **no legal advice, no compliance determination.** +- License matching is identifier/alias based, not full SPDX-expression parsing + (`MIT OR Apache-2.0` is treated as unknown → review). +- Freshness uses declared `published_at`; it does not detect that a live source + silently changed. The `hash` + `local_path` check covers only local files. +- Age budgets are heuristics for internal review, not a policy guarantee. + +## Tests + +`tests/test_supply_chain_sources.py` with fixtures under +`tests/fixtures/supply_chain_sources/`. Run: + +```bash +python -m pytest tests/test_supply_chain_sources.py -q +``` diff --git a/.internal-skills/supply-chain/source-check/example_source_manifest.json b/.internal-skills/supply-chain/source-check/example_source_manifest.json new file mode 100644 index 0000000..53ad742 --- /dev/null +++ b/.internal-skills/supply-chain/source-check/example_source_manifest.json @@ -0,0 +1,15 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-001", + "title": "Open specification (reference)", + "url": "https://example.org/spec", + "retrieved_at": "2026-06-02", + "published_at": "2026-01-01", + "license": "CC-BY-4.0", + "usage": "reference", + "category": "default" + } + ] +} diff --git a/scripts/check_supply_chain_sources.py b/scripts/check_supply_chain_sources.py new file mode 100644 index 0000000..1d6c54e --- /dev/null +++ b/scripts/check_supply_chain_sources.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +"""x.klickd supply-chain — source freshness + license compatibility check. + +Stage 11 (license check) + stage 12 (source freshness) of the documented +supply-chain pipeline. Operates on a source manifest (JSON) describing the +inputs that feed a skill / candidate build. It classifies each source's +license and freshness for INTERNAL REVIEW and produces a deterministic JSON +report. It is a triage tool, NOT legal advice. + +What it does: + - parse + validate a source manifest; + - check required fields per source; + - normalize and classify known licenses: allowed / review / blocked / unknown; + - classify freshness: fresh / review / stale / missing_date (age budget + depends on source category); + - flag missing or non-https URLs (unless explicitly justified); + - verify a referenced local file's sha256 hash when present; + - emit a deterministic report (sorted, clock-independent id); + - exit non-zero when a source is blocked, a license is blocked, a critical + date is absent, or metadata falls below the required threshold. + +The check makes no legal-compliance claim and does not assert that any source +IS compatible — only that it falls into a review bucket. Determinism: the +deterministic_report_id is a sha256 over the manifest hash plus the sorted, +normalized findings; it does not depend on wall-clock, host, or run order. +Any clock-dependent value (the evaluation date used for age math) is recorded +in non_deterministic_zone and excluded from the id. + +Exit codes: + 0 no blocking findings + 1 one or more blocking findings (blocked license/source, missing critical + date, or insufficient metadata) + 2 usage / I/O error (manifest missing, unparseable, bad schema) + +Stdlib-only, offline. No release artefact, no schema change, no network I/O. +""" +from __future__ import annotations + +import argparse +import datetime as _dt +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + +SCHEMA_VERSION_MANIFEST = "xklickd.source_manifest.v0.1" +SCHEMA_VERSION_REPORT = "xklickd.source_check_report.v0.1" + +REPO_ROOT = Path(__file__).resolve().parents[1] + +# --- License policy (triage buckets, NOT legal advice) ---------------------- +# Keys are normalized SPDX-ish identifiers (upper-cased, stripped). +ALLOWED_LICENSES = { + "MIT", + "APACHE-2.0", + "BSD-2-CLAUSE", + "BSD-3-CLAUSE", + "CC0-1.0", + "CC-BY-4.0", +} +REVIEW_LICENSES = { + "CC-BY-SA-4.0", + "MPL-2.0", + "GPL-2.0", + "GPL-3.0", + "AGPL-3.0", + "CUSTOM", + "UNKNOWN", +} +BLOCKED_LICENSES = { + "PROPRIETARY-NO-PERMISSION", + "NO-REDISTRIBUTION", + "ALL-RIGHTS-RESERVED", + "NON-COMMERCIAL-ONLY", +} + +# Common spelling variants -> canonical key. +LICENSE_ALIASES = { + "APACHE2": "APACHE-2.0", + "APACHE 2.0": "APACHE-2.0", + "APACHE-2": "APACHE-2.0", + "BSD2": "BSD-2-CLAUSE", + "BSD-2": "BSD-2-CLAUSE", + "BSD3": "BSD-3-CLAUSE", + "BSD-3": "BSD-3-CLAUSE", + "CC0": "CC0-1.0", + "CC-BY": "CC-BY-4.0", + "CCBY4.0": "CC-BY-4.0", + "CC-BY-SA": "CC-BY-SA-4.0", + "GPLV2": "GPL-2.0", + "GPLV3": "GPL-3.0", + "AGPLV3": "AGPL-3.0", + "ARR": "ALL-RIGHTS-RESERVED", + "NC": "NON-COMMERCIAL-ONLY", + "NONCOMMERCIAL": "NON-COMMERCIAL-ONLY", + "PROPRIETARY": "PROPRIETARY-NO-PERMISSION", +} + +# --- Freshness policy (days) ------------------------------------------------- +# Age budget depends on the declared source category. Parameterizable here. +FRESHNESS_BUDGET_DAYS = { + "default": 365, + "security": 90, + "regulatory": 90, + "academic": 1095, + "theory": 1095, +} +# When a category exceeds its budget but is still under STALE_HARD_DAYS it is +# "review"; beyond that it is "stale". +STALE_MULTIPLIER = 2 # stale threshold = budget * multiplier + +REQUIRED_FIELDS = ("id", "title", "license", "usage") +# Fields whose absence is a freshness/provenance concern (handled specially). +DATE_FIELD = "published_at" +RETRIEVED_FIELD = "retrieved_at" + +# Usages that imply commercial / premium reuse (non-commercial license blocks). +COMMERCIAL_USAGES = {"commercial", "premium", "premium_reuse", "redistribution"} + + +class ManifestError(Exception): + """Raised on a structurally invalid manifest (exit 2).""" + + +# --- helpers ----------------------------------------------------------------- +def _sha256_text(text: str) -> str: + return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def _sha256_file(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as fh: + for chunk in iter(lambda: fh.read(65536), b""): + h.update(chunk) + return "sha256:" + h.hexdigest() + + +def normalize_license(raw: Any) -> str: + if raw is None: + return "UNKNOWN" + key = str(raw).strip().upper() + if not key: + return "UNKNOWN" + key = LICENSE_ALIASES.get(key, key) + return key + + +def classify_license(normalized: str) -> str: + if normalized in ALLOWED_LICENSES: + return "allowed" + if normalized in BLOCKED_LICENSES: + return "blocked" + if normalized in REVIEW_LICENSES: + return "review" + return "unknown" + + +def _parse_date(value: Any) -> _dt.date | None: + if not value: + return None + try: + return _dt.date.fromisoformat(str(value)[:10]) + except ValueError: + return None + + +def _budget_for_category(category: str | None) -> int: + if not category: + return FRESHNESS_BUDGET_DAYS["default"] + return FRESHNESS_BUDGET_DAYS.get(str(category).strip().lower(), + FRESHNESS_BUDGET_DAYS["default"]) + + +def classify_freshness( + published: _dt.date | None, + eval_date: _dt.date, + category: str | None, + superseded: bool, +) -> tuple[str, int | None]: + """Return (class, age_days). class in fresh/review/stale/missing_date.""" + if published is None: + return "missing_date", None + age = (eval_date - published).days + if age < 0: + # Future-dated source: treat as review (suspicious metadata). + return "review", age + budget = _budget_for_category(category) + if superseded: + # Superseded academic/theory loses its long budget. + budget = FRESHNESS_BUDGET_DAYS["default"] + if age <= budget: + return "fresh", age + if age <= budget * STALE_MULTIPLIER: + return "review", age + return "stale", age + + +# --- core evaluation --------------------------------------------------------- +def evaluate_source( + source: dict[str, Any], + eval_date: _dt.date, + manifest_dir: Path, + min_metadata_fields: int, +) -> dict[str, Any]: + """Evaluate one source. Pure given (source, eval_date, manifest_dir).""" + sid = str(source.get("id") or "") + findings: list[str] = [] + blocking: list[str] = [] + + # Required fields. + missing_required = [f for f in REQUIRED_FIELDS if not source.get(f)] + if missing_required: + msg = f"missing required field(s): {', '.join(sorted(missing_required))}" + findings.append(msg) + blocking.append(msg) + + # License. + license_norm = normalize_license(source.get("license")) + license_class = classify_license(license_norm) + usage = str(source.get("usage") or "").strip().lower() + if license_class == "blocked": + msg = f"license blocked: {license_norm}" + findings.append(msg) + blocking.append(msg) + elif license_norm == "NON-COMMERCIAL-ONLY": # defensive; already blocked set + msg = f"non-commercial license for usage '{usage}'" + findings.append(msg) + blocking.append(msg) + elif license_class in ("review", "unknown"): + findings.append(f"license needs review ({license_class}): {license_norm}") + + # Non-commercial reuse cross-check (covers aliases that resolve to NC). + if usage in COMMERCIAL_USAGES and license_norm == "NON-COMMERCIAL-ONLY": + if "non-commercial license" not in " ".join(blocking): + msg = f"non-commercial source for commercial/premium usage '{usage}'" + findings.append(msg) + blocking.append(msg) + + # URL. + url = source.get("url") + url_justified = bool(source.get("url_exempt")) + if not url: + if not url_justified: + msg = "missing url (no url_exempt justification)" + findings.append(msg) + blocking.append(msg) + else: + findings.append("url absent but explicitly exempt") + elif not str(url).lower().startswith("https://"): + if str(url).lower().startswith("http://") and not url_justified: + msg = "non-https url (no url_exempt justification)" + findings.append(msg) + blocking.append(msg) + elif not url_justified: + findings.append(f"non-http(s) url scheme: {url}") + + # Freshness. + published = _parse_date(source.get(DATE_FIELD)) + category = source.get("category") + superseded = bool(source.get("superseded")) + freshness_class, age_days = classify_freshness( + published, eval_date, category, superseded + ) + if freshness_class == "missing_date": + # Critical for security/regulatory; review otherwise. + cat_norm = str(category or "").strip().lower() + if cat_norm in ("security", "regulatory"): + msg = "missing published_at for security/regulatory source (critical)" + findings.append(msg) + blocking.append(msg) + else: + findings.append("missing published_at date (review)") + elif freshness_class == "stale": + cat_norm = str(category or "").strip().lower() + if cat_norm in ("security", "regulatory"): + msg = f"stale security/regulatory source ({age_days} days old)" + findings.append(msg) + blocking.append(msg) + else: + findings.append(f"stale source ({age_days} days old) — review") + elif freshness_class == "review": + if age_days is not None and age_days < 0: + findings.append("published_at is in the future — review") + else: + findings.append(f"source past freshness budget ({age_days} days) — review") + + # Hash verification for a referenced local file. + local_path = source.get("local_path") + hash_status = "not_applicable" + if local_path: + candidate = (manifest_dir / str(local_path)).resolve() + declared = source.get("hash") + if not candidate.exists(): + hash_status = "file_missing" + msg = f"local_path not found: {local_path}" + findings.append(msg) + blocking.append(msg) + elif not declared: + hash_status = "declared_hash_missing" + findings.append(f"local_path present but no declared hash: {local_path}") + else: + actual = _sha256_file(candidate) + if str(declared).strip().lower() == actual.lower(): + hash_status = "match" + else: + hash_status = "mismatch" + msg = f"hash mismatch for {local_path}" + findings.append(msg) + blocking.append(msg) + + # Metadata sufficiency threshold. + present_meta = sum( + 1 for f in ("title", "url", "published_at", "retrieved_at", "license", + "usage", "hash") + if source.get(f) + ) + if present_meta < min_metadata_fields: + msg = (f"insufficient metadata: {present_meta} of " + f"{min_metadata_fields} required descriptive fields present") + findings.append(msg) + blocking.append(msg) + + # Overall verdict. + if blocking: + verdict = "blocked" + elif (license_class in ("review", "unknown") + or freshness_class in ("review", "stale", "missing_date")): + verdict = "review" + else: + verdict = "allowed" + + return { + "id": sid, + "verdict": verdict, + "license_raw": source.get("license"), + "license_normalized": license_norm, + "license_class": license_class, + "usage": usage or None, + "category": (str(category).strip().lower() if category else None), + "freshness_class": freshness_class, + "age_days": age_days, + "hash_status": hash_status, + "findings": sorted(findings), + "blocking_findings": sorted(blocking), + } + + +def _deterministic_report_id(manifest_hash: str, findings: list[dict[str, Any]]) -> str: + """sha256 over manifest hash + sorted normalized per-source findings. + + Clock-independent: age_days and any eval-date value are excluded here so + that two runs with the same manifest produce the same id regardless of when + they run. (age_days IS reported per source, but is not part of the id.) + """ + normalized = [] + for f in sorted(findings, key=lambda x: x["id"]): + normalized.append({ + "id": f["id"], + "verdict": f["verdict"], + "license_normalized": f["license_normalized"], + "license_class": f["license_class"], + "freshness_class": f["freshness_class"], + "hash_status": f["hash_status"], + "findings": f["findings"], + "blocking_findings": f["blocking_findings"], + }) + payload = json.dumps( + {"manifest_hash": manifest_hash, "sources": normalized}, + sort_keys=True, separators=(",", ":"), + ) + return _sha256_text(payload) + + +def build_report( + manifest: dict[str, Any], + manifest_text: str, + manifest_path: Path, + eval_date: _dt.date, + min_metadata_fields: int, +) -> dict[str, Any]: + sources = manifest.get("sources") + if not isinstance(sources, list): + raise ManifestError("manifest 'sources' must be a list") + + manifest_hash = _sha256_text(manifest_text) + manifest_dir = manifest_path.resolve().parent + + seen_ids: set[str] = set() + findings: list[dict[str, Any]] = [] + for idx, src in enumerate(sources): + if not isinstance(src, dict): + raise ManifestError(f"source at index {idx} is not an object") + result = evaluate_source(src, eval_date, manifest_dir, min_metadata_fields) + if result["id"] in seen_ids: + result["findings"] = sorted(result["findings"] + ["duplicate source id"]) + result["blocking_findings"] = sorted( + result["blocking_findings"] + ["duplicate source id"] + ) + result["verdict"] = "blocked" + seen_ids.add(result["id"]) + findings.append(result) + + findings.sort(key=lambda x: x["id"]) + + blocked = [f for f in findings if f["verdict"] == "blocked"] + review = [f for f in findings if f["verdict"] == "review"] + allowed = [f for f in findings if f["verdict"] == "allowed"] + + recommendations: list[str] = [] + if blocked: + recommendations.append( + "Resolve or remove blocked sources before candidate generation.") + if review: + recommendations.append( + "Route review/unknown-license and past-budget sources to internal " + "human/agent review; this tool does not give legal advice.") + if not blocked and not review: + recommendations.append("No blocking or review findings in this manifest.") + + report = { + "schema_version": SCHEMA_VERSION_REPORT, + "manifest_path": str(manifest_path), + "manifest_hash": manifest_hash, + "deterministic_report_id": _deterministic_report_id(manifest_hash, findings), + "summary": { + "total_sources": len(findings), + "allowed": len(allowed), + "review": len(review), + "blocked": len(blocked), + }, + "source_findings": findings, + "blocked_findings": [ + {"id": f["id"], "blocking_findings": f["blocking_findings"]} + for f in blocked + ], + "review_findings": [ + {"id": f["id"], "findings": f["findings"]} for f in review + ], + "recommendations": recommendations, + "non_deterministic_zone": { + "evaluated_at": eval_date.isoformat(), + "note": ("evaluated_at and per-source age_days depend on the run " + "date and are excluded from deterministic_report_id."), + }, + } + return report + + +def load_manifest(path: Path) -> tuple[dict[str, Any], str]: + if not path.exists(): + raise ManifestError(f"manifest not found: {path}") + text = path.read_text(encoding="utf-8") + try: + data = json.loads(text) + except json.JSONDecodeError as exc: + raise ManifestError(f"manifest is not valid JSON: {exc}") from exc + if not isinstance(data, dict): + raise ManifestError("manifest root must be a JSON object") + sv = data.get("schema_version") + if sv != SCHEMA_VERSION_MANIFEST: + raise ManifestError( + f"unexpected schema_version: {sv!r} (expected {SCHEMA_VERSION_MANIFEST!r})" + ) + return data, text + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="x.klickd supply-chain source freshness + license check " + "(internal triage, not legal advice)." + ) + parser.add_argument("--manifest", required=True, help="path to source manifest JSON") + parser.add_argument("--out", help="write deterministic JSON report to this path") + parser.add_argument("--quiet", action="store_true", + help="do not print the report to stdout") + parser.add_argument( + "--eval-date", + help="ISO date used for age math (default: today UTC). Set for " + "reproducible freshness classification in tests/CI.", + ) + parser.add_argument( + "--min-metadata-fields", type=int, default=3, + help="minimum descriptive fields a source must carry (default 3).", + ) + args = parser.parse_args(argv) + + try: + manifest, text = load_manifest(Path(args.manifest)) + except ManifestError as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + + if args.eval_date: + eval_date = _parse_date(args.eval_date) + if eval_date is None: + print(f"error: invalid --eval-date: {args.eval_date}", file=sys.stderr) + return 2 + else: + eval_date = _dt.datetime.now(_dt.timezone.utc).date() + + try: + report = build_report( + manifest, text, Path(args.manifest), eval_date, args.min_metadata_fields + ) + except ManifestError as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + + serialized = json.dumps(report, indent=2, sort_keys=True) + "\n" + if args.out: + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(serialized, encoding="utf-8") + if not args.quiet: + sys.stdout.write(serialized) + + return 1 if report["summary"]["blocked"] > 0 else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/fixtures/supply_chain_sources/data/local_source.txt b/tests/fixtures/supply_chain_sources/data/local_source.txt new file mode 100644 index 0000000..0654eec --- /dev/null +++ b/tests/fixtures/supply_chain_sources/data/local_source.txt @@ -0,0 +1,2 @@ +x.klickd source freshness + license check — local hash fixture. +This file's sha256 is referenced by manifest_hash_match.json. diff --git a/tests/fixtures/supply_chain_sources/manifest_academic_superseded.json b/tests/fixtures/supply_chain_sources/manifest_academic_superseded.json new file mode 100644 index 0000000..09af9ae --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_academic_superseded.json @@ -0,0 +1,26 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-academic-old-ok", + "title": "Foundational academic paper, still current", + "url": "https://example.org/paper", + "retrieved_at": "2026-06-02", + "published_at": "2024-06-01", + "license": "CC-BY-4.0", + "usage": "reference", + "category": "academic" + }, + { + "id": "source-academic-superseded", + "title": "Older academic paper marked superseded", + "url": "https://example.org/paper-old", + "retrieved_at": "2026-06-02", + "published_at": "2025-01-01", + "license": "CC-BY-4.0", + "usage": "reference", + "category": "academic", + "superseded": true + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_hash_match.json b/tests/fixtures/supply_chain_sources/manifest_hash_match.json new file mode 100644 index 0000000..8503d3e --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_hash_match.json @@ -0,0 +1,16 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-hash-ok", + "title": "Local source with verified hash", + "url": "https://example.org/local", + "retrieved_at": "2026-06-02", + "published_at": "2026-04-01", + "license": "MIT", + "usage": "reference", + "local_path": "data/local_source.txt", + "hash": "sha256:b79b18b57eb7e3e6806294eb07e6a06116c324bdd95d35ba212c4ff1a3e3ef66" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_hash_mismatch.json b/tests/fixtures/supply_chain_sources/manifest_hash_mismatch.json new file mode 100644 index 0000000..90404bb --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_hash_mismatch.json @@ -0,0 +1,16 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-hash-bad", + "title": "Local source with wrong declared hash", + "url": "https://example.org/local", + "retrieved_at": "2026-06-02", + "published_at": "2026-04-01", + "license": "MIT", + "usage": "reference", + "local_path": "data/local_source.txt", + "hash": "sha256:0000000000000000000000000000000000000000000000000000000000000000" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_license_blocked.json b/tests/fixtures/supply_chain_sources/manifest_license_blocked.json new file mode 100644 index 0000000..29b0d3c --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_license_blocked.json @@ -0,0 +1,14 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-blocked-lic", + "title": "Proprietary source, no permission", + "url": "https://example.org/proprietary", + "retrieved_at": "2026-06-02", + "published_at": "2026-02-01", + "license": "all-rights-reserved", + "usage": "reference" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_license_unknown.json b/tests/fixtures/supply_chain_sources/manifest_license_unknown.json new file mode 100644 index 0000000..e4f4e9c --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_license_unknown.json @@ -0,0 +1,14 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-unknown-lic", + "title": "Source with unrecognized license", + "url": "https://example.org/thing", + "retrieved_at": "2026-06-02", + "published_at": "2026-02-01", + "license": "SomeNovelLicense-9.9", + "usage": "reference" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_missing_fields.json b/tests/fixtures/supply_chain_sources/manifest_missing_fields.json new file mode 100644 index 0000000..3593001 --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_missing_fields.json @@ -0,0 +1,12 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-no-url", + "title": "Source missing url and date", + "retrieved_at": "2026-06-02", + "license": "MIT", + "usage": "reference" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_missing_security_date.json b/tests/fixtures/supply_chain_sources/manifest_missing_security_date.json new file mode 100644 index 0000000..65804d1 --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_missing_security_date.json @@ -0,0 +1,14 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-sec-no-date", + "title": "Security guidance with no publication date", + "url": "https://example.org/sec-guidance", + "retrieved_at": "2026-06-02", + "license": "CC-BY-4.0", + "usage": "reference", + "category": "security" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_noncommercial_premium.json b/tests/fixtures/supply_chain_sources/manifest_noncommercial_premium.json new file mode 100644 index 0000000..f394a2c --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_noncommercial_premium.json @@ -0,0 +1,14 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-nc-premium", + "title": "Non-commercial dataset used for premium reuse", + "url": "https://example.org/nc-data", + "retrieved_at": "2026-06-02", + "published_at": "2026-03-01", + "license": "non-commercial-only", + "usage": "premium" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_ok.json b/tests/fixtures/supply_chain_sources/manifest_ok.json new file mode 100644 index 0000000..52d41a5 --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_ok.json @@ -0,0 +1,23 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-001", + "title": "Example open specification", + "url": "https://example.org/spec", + "retrieved_at": "2026-06-02", + "published_at": "2026-01-01", + "license": "CC-BY-4.0", + "usage": "reference" + }, + { + "id": "source-002", + "title": "MIT reference library", + "url": "https://example.org/lib", + "retrieved_at": "2026-06-02", + "published_at": "2025-12-01", + "license": "MIT", + "usage": "reference" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_stale_reference.json b/tests/fixtures/supply_chain_sources/manifest_stale_reference.json new file mode 100644 index 0000000..4549b3e --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_stale_reference.json @@ -0,0 +1,15 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-stale-ref", + "title": "Old reference document", + "url": "https://example.org/old", + "retrieved_at": "2026-06-02", + "published_at": "2022-01-01", + "license": "MIT", + "usage": "reference", + "category": "default" + } + ] +} diff --git a/tests/fixtures/supply_chain_sources/manifest_stale_security.json b/tests/fixtures/supply_chain_sources/manifest_stale_security.json new file mode 100644 index 0000000..41bf831 --- /dev/null +++ b/tests/fixtures/supply_chain_sources/manifest_stale_security.json @@ -0,0 +1,15 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "source-stale-security", + "title": "Outdated regulatory guidance", + "url": "https://example.org/regulatory-old", + "retrieved_at": "2026-06-02", + "published_at": "2024-01-01", + "license": "CC-BY-4.0", + "usage": "reference", + "category": "security" + } + ] +} diff --git a/tests/test_supply_chain_sources.py b/tests/test_supply_chain_sources.py new file mode 100644 index 0000000..daeeda1 --- /dev/null +++ b/tests/test_supply_chain_sources.py @@ -0,0 +1,242 @@ +"""Tests for scripts/check_supply_chain_sources.py. + +Source freshness + license compatibility triage. NON-NORMATIVE; no legal +advice, no schema change. A fixed --eval-date (2026-06-02) is used so freshness +classification is reproducible regardless of when the suite runs. +""" +from __future__ import annotations + +import datetime as _dt +import importlib.util +import json +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPT = REPO_ROOT / "scripts" / "check_supply_chain_sources.py" +FIX = REPO_ROOT / "tests" / "fixtures" / "supply_chain_sources" +EVAL_DATE = _dt.date(2026, 6, 2) + + +def _load(): + spec = importlib.util.spec_from_file_location("check_supply_chain_sources", SCRIPT) + assert spec and spec.loader, f"could not load {SCRIPT}" + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def _report(mod, name: str): + path = FIX / name + manifest, text = mod.load_manifest(path) + return mod.build_report(manifest, text, path, EVAL_DATE, 3) + + +def _by_id(report, sid): + return next(f for f in report["source_findings"] if f["id"] == sid) + + +# --- structural -------------------------------------------------------------- +def test_script_exists(): + assert SCRIPT.exists() + + +def test_ok_manifest_all_allowed(): + mod = _load() + rep = _report(mod, "manifest_ok.json") + assert rep["summary"]["blocked"] == 0 + assert rep["summary"]["review"] == 0 + assert rep["summary"]["allowed"] == 2 + assert all(f["verdict"] == "allowed" for f in rep["source_findings"]) + + +def test_report_has_required_fields(): + mod = _load() + rep = _report(mod, "manifest_ok.json") + for field in ( + "schema_version", "manifest_path", "manifest_hash", + "deterministic_report_id", "summary", "source_findings", + "blocked_findings", "review_findings", "recommendations", + "non_deterministic_zone", + ): + assert field in rep, f"missing report field: {field}" + + +# --- license ----------------------------------------------------------------- +def test_unknown_license_is_review(): + mod = _load() + rep = _report(mod, "manifest_license_unknown.json") + f = _by_id(rep, "source-unknown-lic") + assert f["license_class"] == "unknown" + assert f["verdict"] == "review" + assert rep["summary"]["blocked"] == 0 + + +def test_blocked_license_blocks(): + mod = _load() + rep = _report(mod, "manifest_license_blocked.json") + f = _by_id(rep, "source-blocked-lic") + assert f["license_class"] == "blocked" + assert f["verdict"] == "blocked" + assert rep["summary"]["blocked"] == 1 + + +def test_license_normalization_aliases(): + mod = _load() + assert mod.normalize_license("apache2") == "APACHE-2.0" + assert mod.classify_license(mod.normalize_license("apache2")) == "allowed" + assert mod.normalize_license("ARR") == "ALL-RIGHTS-RESERVED" + assert mod.classify_license(mod.normalize_license("ARR")) == "blocked" + + +# --- missing fields ---------------------------------------------------------- +def test_missing_url_and_date_flagged(): + mod = _load() + rep = _report(mod, "manifest_missing_fields.json") + f = _by_id(rep, "source-no-url") + joined = " ".join(f["findings"]) + assert "missing url" in joined + assert "published_at" in joined + assert f["verdict"] == "blocked" # missing url is blocking + + +# --- freshness --------------------------------------------------------------- +def test_stale_reference_is_review_not_blocked(): + mod = _load() + rep = _report(mod, "manifest_stale_reference.json") + f = _by_id(rep, "source-stale-ref") + assert f["freshness_class"] == "stale" + assert f["verdict"] == "review" + assert rep["summary"]["blocked"] == 0 + + +def test_stale_security_source_blocks(): + mod = _load() + rep = _report(mod, "manifest_stale_security.json") + f = _by_id(rep, "source-stale-security") + assert f["freshness_class"] == "stale" + assert f["verdict"] == "blocked" + + +def test_missing_security_date_blocks(): + mod = _load() + rep = _report(mod, "manifest_missing_security_date.json") + f = _by_id(rep, "source-sec-no-date") + assert f["freshness_class"] == "missing_date" + assert f["verdict"] == "blocked" + + +def test_academic_long_budget_and_superseded_review(): + mod = _load() + rep = _report(mod, "manifest_academic_superseded.json") + ok = _by_id(rep, "source-academic-old-ok") + sup = _by_id(rep, "source-academic-superseded") + assert ok["freshness_class"] == "fresh" + assert sup["freshness_class"] == "review" + + +# --- non-commercial / premium ------------------------------------------------ +def test_noncommercial_for_premium_blocks(): + mod = _load() + rep = _report(mod, "manifest_noncommercial_premium.json") + f = _by_id(rep, "source-nc-premium") + assert f["verdict"] == "blocked" + assert any("non-commercial" in b for b in f["blocking_findings"]) + + +# --- hash -------------------------------------------------------------------- +def test_hash_match_ok(): + mod = _load() + rep = _report(mod, "manifest_hash_match.json") + f = _by_id(rep, "source-hash-ok") + assert f["hash_status"] == "match" + assert f["verdict"] == "allowed" + + +def test_hash_mismatch_blocks(): + mod = _load() + rep = _report(mod, "manifest_hash_mismatch.json") + f = _by_id(rep, "source-hash-bad") + assert f["hash_status"] == "mismatch" + assert f["verdict"] == "blocked" + + +# --- determinism ------------------------------------------------------------- +def test_deterministic_report_id_stable_across_runs(): + mod = _load() + r1 = _report(mod, "manifest_ok.json") + r2 = _report(mod, "manifest_ok.json") + assert r1["deterministic_report_id"] == r2["deterministic_report_id"] + + +def test_report_id_excludes_clock_marker_and_age(): + """Same inputs (manifest + eval-date) -> same id, regardless of when run. + + The id excludes the wall-clock timestamp and raw age_days, but DOES include + derived freshness/license classification (those are meaningful outputs). So + two runs on the same eval-date must match even though evaluated_at is a clock + value living in non_deterministic_zone. + """ + mod = _load() + path = FIX / "manifest_ok.json" + manifest, text = mod.load_manifest(path) + a = mod.build_report(manifest, text, path, _dt.date(2026, 6, 2), 3) + b = mod.build_report(manifest, text, path, _dt.date(2026, 6, 2), 3) + assert a["deterministic_report_id"] == b["deterministic_report_id"] + assert "evaluated_at" in a["non_deterministic_zone"] + + +def test_changed_freshness_class_changes_id(): + """A different eval-date that flips a freshness class is a different + semantic result and SHOULD yield a different id (not silently identical).""" + mod = _load() + path = FIX / "manifest_stale_reference.json" + manifest, text = mod.load_manifest(path) + a = mod.build_report(manifest, text, path, _dt.date(2023, 1, 1), 3) # fresh + b = mod.build_report(manifest, text, path, _dt.date(2026, 6, 2), 3) # stale + assert a["source_findings"][0]["freshness_class"] != \ + b["source_findings"][0]["freshness_class"] + assert a["deterministic_report_id"] != b["deterministic_report_id"] + + +def test_distinct_manifests_distinct_ids(): + mod = _load() + a = _report(mod, "manifest_ok.json") + b = _report(mod, "manifest_license_blocked.json") + assert a["deterministic_report_id"] != b["deterministic_report_id"] + + +# --- CLI exit codes ---------------------------------------------------------- +def test_cli_exit_zero_on_ok(tmp_path): + mod = _load() + out = tmp_path / "report.json" + rc = mod.main([ + "--manifest", str(FIX / "manifest_ok.json"), + "--out", str(out), "--quiet", "--eval-date", "2026-06-02", + ]) + assert rc == 0 + data = json.loads(out.read_text()) + assert data["summary"]["blocked"] == 0 + + +def test_cli_exit_one_on_blocked(tmp_path): + mod = _load() + out = tmp_path / "report.json" + rc = mod.main([ + "--manifest", str(FIX / "manifest_license_blocked.json"), + "--out", str(out), "--quiet", "--eval-date", "2026-06-02", + ]) + assert rc == 1 + + +def test_cli_exit_two_on_missing_manifest(tmp_path): + mod = _load() + rc = mod.main(["--manifest", str(tmp_path / "nope.json"), "--quiet"]) + assert rc == 2 + + +def test_cli_exit_two_on_bad_schema(tmp_path): + mod = _load() + bad = tmp_path / "bad.json" + bad.write_text(json.dumps({"schema_version": "wrong", "sources": []})) + rc = mod.main(["--manifest", str(bad), "--quiet"]) + assert rc == 2 From 167f0356df2c0367cf552cea93df22484e998567 Mon Sep 17 00:00:00 2001 From: klickd-agent Date: Tue, 2 Jun 2026 11:44:09 +0000 Subject: [PATCH 5/8] docs(internal): add v4.2 internal target skill mapping (draft, not public) Internal documentation only, ahead of internal production. Adds docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md + README capturing the validated v4.2 mapping corrections: - governance_system detailed and symmetric with memory_system - supply_chain renamed conceptually to skill_lifecycle (no completeness claim) - output_contract wired to context_graph via graph_bindings - harmonised competency/domain naming (primary/secondary_domain_competencies, domain_risk_profile, domain_output_requirements, competency_core) - explicit interactions layer + canonical end-to-end flow Non-normative, no release. Public .klickd stays v4.0.0 GA; the 42 x.klickd artefacts remain v4.1 candidates. No tag/DOI/package/schema/SDK change, no public v4.2 claim, no artefact modified. Internal track name kept out of public surfaces. Co-Authored-By: Claude Opus 4.7 --- docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md | 276 +++++++++++++++++++ docs/internal/README.md | 16 ++ 2 files changed, 292 insertions(+) create mode 100644 docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md create mode 100644 docs/internal/README.md diff --git a/docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md b/docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md new file mode 100644 index 0000000..cb89528 --- /dev/null +++ b/docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md @@ -0,0 +1,276 @@ +# `xklickd_internal_skill_v4_2` — internal target mapping (DRAFT, NOT a public release) + +| | | +|---|---| +| **Status** | **Internal · DRAFT · TARGET · NON-NORMATIVE** | +| **Track** | `xklickd_internal_skill_v4_2` — internal target mapping, ahead of internal production | +| **Created** | 2026-06-02 | +| **Scope** | Internal documentation only. This is the **target** internal mapping for v4.2; it is **not** a public release artefact. | +| **Relates to** | [`docs/chimera/README_V4_1.md`](../chimera/README_V4_1.md) · [`docs/chimera/V4_1_SKILL_CANDIDATE_MAPPING.md`](../chimera/V4_1_SKILL_CANDIDATE_MAPPING.md) · [`docs/chimera/V4_1_COMPETENCY_IDENTIFICATION_PROTOCOL.md`](../chimera/V4_1_COMPETENCY_IDENTIFICATION_PROTOCOL.md) · [`docs/rfcs/RFC-010-pack-memory-compression.md`](../rfcs/RFC-010-pack-memory-compression.md) | + +> **This document is non-normative and triggers no release.** It is **internal documentation only**. It does **not** modify `SPEC.md`, any v4.0 GA schema, any SDK, any vector, any lock file, or the status of any RFC. It introduces **no new normative field**. It does **not** publish anything: no tag, no `latest` on npm/PyPI, no DOI on Zenodo, no IANA action, no GitHub Release, no SDK bump, no `/klickdskill` catalog change. +> +> **The public `.klickd` track remains v4.0.0 GA, with the 42 `x.klickd` artefacts under `examples/v4.1/x-klickd-skills/{lite,pro}/` framed as v4.1 candidates.** Nothing here promotes any public artefact to v4.2 and nothing here claims a public v4.2 release. v4.2 is **in preparation**; see [`docs/public/X_KLICKD_SITE_COPY.md`](../public/X_KLICKD_SITE_COPY.md) §7 ("v4.2 in preparation", no date promised). +> +> **The internal track name (`xklickd_internal_skill_v4_2`) MUST NOT leak into any public-facing surface.** Public wording remains `x.klickd`. This is a continuation of the v4.1 QA gate that forbids internal-name / host-side leakage (QA-G13 in [`docs/chimera/V4_1_SKILL_QA_PROTOCOL.md`](../chimera/V4_1_SKILL_QA_PROTOCOL.md)). +> +> **Supply-chain completeness is NOT claimed.** The `skill_lifecycle` section below describes the *target* lifecycle layout; it is not an assertion that every lifecycle stage is implemented, populated, or verified. + +--- + +## 0. What this document is and is not + +`docs/internal/` holds **internal** target mappings that precede internal production. This document records the v4.2 *target* internal skill mapping so reviewers can argue with **structure, naming, governance symmetry, and layer interactions** before any internal build lands. + +This document **is**: + +- an internal **target** for the v4.2 skill mapping shape; +- a record of the validated corrections that motivated v4.2 (governance symmetry, lifecycle rename, graph bindings, naming harmonisation, explicit interactions); +- a planning surface, comparable to `docs/chimera/` for v4.1 but scoped to the internal track. + +This document **is not**: + +- a public release or release announcement; +- a normative spec, schema, or SDK contract; +- a claim that any public `x.klickd` artefact is v4.2; +- a claim that the skill supply chain / lifecycle is complete. + +--- + +## 1. Target top-level structure + +The v4.2 internal skill mapping targets the following top-level layout. Each node is a documentation layer in the internal mapping, not a shipped artefact. + +```text +xklickd_internal_skill_v4_2 +├── metadata +├── competency_architecture +├── memory_system +├── governance_system +├── memory_governance +├── runtime +├── context_graph +├── interactions +├── evidence +├── security +├── audit +├── skill_lifecycle +└── output_contract +``` + +Two changes from earlier internal drafts are deliberate and validated: + +1. **`supply_chain` is renamed conceptually to `skill_lifecycle`.** The earlier `supply_chain` label over-claimed a complete, audited provenance chain. `skill_lifecycle` names what the layer actually documents: the build → validate → promote → deprecate → release lifecycle of a skill, without asserting end-to-end supply-chain completeness. See §6. +2. **`interactions` is added as a first-class top-level layer** so the way layers communicate is documented explicitly rather than implied. See §5. + +--- + +## 2. `governance_system` — detailed and symmetric with `memory_system` + +`governance_system` is brought up to the same level of internal detail as `memory_system`, so the two control planes (what the agent remembers vs. what the agent is allowed to do) are documented symmetrically. + +```text +governance_system +├── authority_hierarchy +├── human_veto +├── consent_rules +├── risk_levels +├── action_gates +├── non_lowerable_rules +├── escalation_rules +├── approval_lifecycle +├── revocation_rules +├── policy_conflict_resolution +└── governance_audit +``` + +| Sub-layer | Internal purpose | +|---|---| +| `authority_hierarchy` | Who/what can authorise an action class, and the precedence order between authorities. | +| `human_veto` | The point where a human can block an action before it executes; never lowerable by an agent. | +| `consent_rules` | What the user must have consented to for a given action class to be permitted. | +| `risk_levels` | The risk taxonomy that gates actions; higher risk requires stronger gates. | +| `action_gates` | The concrete preconditions an action must clear before execution. | +| `non_lowerable_rules` | Rules an agent can never relax, even under instruction or memory pressure. | +| `escalation_rules` | When and how a decision is escalated (e.g. to human veto). | +| `approval_lifecycle` | The states an approval moves through (requested → granted/denied → expired). | +| `revocation_rules` | How a previously granted approval is revoked and what that invalidates. | +| `policy_conflict_resolution` | Deterministic resolution when two policies disagree. | +| `governance_audit` | The audit surface specific to governance decisions, feeding the shared `audit` layer. | + +### 2.1 Symmetry with `memory_system` and `memory_governance` + +`memory_governance` remains the bridge layer: it is where `memory_system` (retrieval, write candidates, retention) meets `governance_system` (consent, risk, veto). The symmetry intent is: every memory write that could influence an action is subject to governance, and every governance decision that consults memory is auditable. `memory_governance` is intentionally kept separate from both parents so the bridge contract is reviewable on its own. + +--- + +## 3. Naming harmonisation (competency / domain) + +The following names are harmonised in the v4.2 internal mapping for consistency with the v4.1 competency vocabulary in [`docs/chimera/V4_1_COMPETENCY_IDENTIFICATION_PROTOCOL.md`](../chimera/V4_1_COMPETENCY_IDENTIFICATION_PROTOCOL.md): + +| Section | Was | Now (v4.2 internal target) | +|---|---|---| +| top-level | `competency_architecture` | `competency_architecture` (unchanged — the umbrella section) | +| foundation | — | `competency_core` (the shared foundation competency) | +| primary domain | `domain_primary` | `primary_domain_competencies` | +| secondary domain | `domain_secondary` | `secondary_domain_competencies` | +| risk | `domain_risk_model` | `domain_risk_profile` | +| output | `domain_output_contract` | `domain_output_requirements` | + +`competency_architecture` targets the following internal shape: + +```text +competency_architecture +├── competency_core +├── primary_domain_competencies +├── secondary_domain_competencies +├── domain_risk_profile +└── domain_output_requirements +``` + +Rationale: the previous `domain_*` names mixed scope (which domain) with kind (what the field is). The harmonised names lead with kind (`*_domain_competencies`, `domain_risk_profile`, `domain_output_requirements`) so they read consistently and so `domain_output_requirements` is clearly distinct from the top-level `output_contract` (§7). + +--- + +## 4. `context_graph` + +`context_graph` is the layer the runtime traverses to resolve what is in scope for a task: competencies, memory entries, evidence, policy nodes, and action nodes, plus the edges between them. v4.2 makes the graph the explicit binding target for `output_contract` (§7) so that an output is tied to the graph nodes that justify it. + +--- + +## 5. `interactions` — how the layers communicate + +`interactions` is a new top-level layer that documents the directed flows between layers. It exists so the communication contract is explicit and reviewable, instead of being implied by prose scattered across other sections. + +```text +interactions +├── task_to_competency_flow +├── competency_to_memory_flow +├── memory_to_context_graph_flow +├── context_graph_to_policy_flow +├── policy_to_output_contract_flow +├── output_to_audit_flow +└── lifecycle_to_runtime_flow +``` + +| Flow | From → To | What moves | +|---|---|---| +| `task_to_competency_flow` | task intake → `competency_architecture` | Detected intent activates the relevant competencies. | +| `competency_to_memory_flow` | `competency_architecture` → `memory_system` | Active competencies scope which memory is retrieved. | +| `memory_to_context_graph_flow` | `memory_system` → `context_graph` | Retrieved memory is placed as nodes/edges for traversal. | +| `context_graph_to_policy_flow` | `context_graph` → `governance_system` | Graph traversal surfaces the policy nodes that gate the task. | +| `policy_to_output_contract_flow` | `governance_system` → `output_contract` | Policy evaluation constrains what outputs are allowed/forbidden. | +| `output_to_audit_flow` | `output_contract` → `audit` | Every checked output emits an audit event. | +| `lifecycle_to_runtime_flow` | `skill_lifecycle` → `runtime` | Only promoted skills are loadable by the runtime; lifecycle state gates availability. | + +### 5.1 Canonical flow + +The canonical end-to-end flow for a single task is: + +```text +user_task +→ intent_detection +→ competency_activation +→ memory_retrieval +→ context_graph_traversal +→ evidence_resolution +→ policy_evaluation +→ output_contract_check +→ human_veto_if_required +→ response_or_action +→ audit_event +→ memory_update_candidate +``` + +Notes: + +- `human_veto_if_required` is gated by `governance_system.human_veto` and `governance_system.risk_levels`; it is **not** lowerable by an agent. +- `memory_update_candidate` is a *candidate*, not a committed write — it is subject to `memory_governance` before it can influence future tasks. +- `evidence_resolution` reads from the `evidence` layer; an output that requires citations (see `output_contract.required_citations`) cannot pass `output_contract_check` without resolved evidence. + +--- + +## 6. `skill_lifecycle` (formerly `supply_chain`) + +Renamed conceptually from `supply_chain` to `skill_lifecycle`. The layer documents the lifecycle of a skill from build request to release/deprecation. **It does not claim that the supply chain is complete or that every stage is implemented or verified.** + +```text +skill_lifecycle +├── build_request +├── source_manifest +├── generated_candidate +├── validation_pipeline +├── audit_trail_index +├── determinism_record +├── logical_diff_report +├── source_license_report +├── threat_model_report +├── benchmark_report +├── premium_pass_report +├── promotion_gate +├── rollback_protocol +├── deprecation_protocol +└── release_record +``` + +| Stage | Internal purpose | +|---|---| +| `build_request` | The request that initiates a skill build. | +| `source_manifest` | The declared sources/competency anchors the build draws on. | +| `generated_candidate` | The produced candidate skill, pre-validation. | +| `validation_pipeline` | The ordered checks a candidate must pass. | +| `audit_trail_index` | Index into audit events produced during the build. | +| `determinism_record` | Evidence that the build is reproducible. | +| `logical_diff_report` | What changed logically versus the prior version. | +| `source_license_report` | Licence status of the declared sources. | +| `threat_model_report` | Threat-model review for the candidate. | +| `benchmark_report` | Benchmark results for the candidate. | +| `premium_pass_report` | Premium/extended review pass results. | +| `promotion_gate` | The gate a candidate must clear to be promoted. | +| `rollback_protocol` | How a promoted skill is rolled back. | +| `deprecation_protocol` | How a skill is retired. | +| `release_record` | Internal record of an internal promotion. **Not** a public release; no tag/DOI/package action. | + +> `release_record` here is an **internal** lifecycle record only. It does not correspond to any public tag, npm/PyPI `latest`, Zenodo DOI, or GitHub Release, and must never be presented as one. + +--- + +## 7. `output_contract` with `graph_bindings` + +`output_contract` is extended with a `graph_bindings` sub-layer so that the contract is wired to `context_graph` (§4): every contracted output declares which graph nodes/edges it creates or requires. + +```text +output_contract +├── allowed_outputs +├── forbidden_outputs +├── required_citations +├── required_uncertainty_markers +├── required_handoff_summary +├── required_audit_event +└── graph_bindings + ├── creates_action_node + ├── requires_policy_node + ├── requires_evidence_node + ├── may_trigger_veto_edge + └── writes_audit_edge +``` + +| `graph_bindings` field | Meaning | +|---|---| +| `creates_action_node` | The output produces an action node in `context_graph`. | +| `requires_policy_node` | A policy node must exist and be satisfied (links to `governance_system`). | +| `requires_evidence_node` | An evidence node must be resolved (links to `evidence`). | +| `may_trigger_veto_edge` | The output may create an edge to `governance_system.human_veto`. | +| `writes_audit_edge` | The output writes an edge into the `audit` layer. | + +This closes the loop with the canonical flow (§5.1): `output_contract_check` is the step that verifies these bindings before `human_veto_if_required` and `response_or_action`. + +--- + +## 8. Out of scope / explicit non-claims + +- **No public release.** Public `.klickd` is v4.0.0 GA; the 42 `x.klickd` artefacts are v4.1 candidates. This document does not change that and does not announce a public v4.2. +- **No artefact changes.** No file under `examples/v4.1/x-klickd-skills/` is modified by this document. No version, DOI, release note, or public README is touched. +- **No supply-chain completeness claim.** `skill_lifecycle` is a target layout, not an assertion that the lifecycle is fully built or verified. +- **No internal-name leakage.** `xklickd_internal_skill_v4_2` is an internal track name and must not appear on public surfaces. +- **Repository boundary.** This document is scoped to `Davincc77/klickdskill` only. It does not reference or modify any other repository. diff --git a/docs/internal/README.md b/docs/internal/README.md new file mode 100644 index 0000000..c134634 --- /dev/null +++ b/docs/internal/README.md @@ -0,0 +1,16 @@ +# `docs/internal/` — internal target mappings (DRAFT, not public) + +| | | +|---|---| +| **Status** | **Internal · DRAFT · NON-NORMATIVE** | +| **Created** | 2026-06-02 | + +> This directory holds **internal** target mappings that precede internal production. Nothing here is a public release, a normative spec, a schema, or an SDK contract. Nothing here promotes any public `x.klickd` artefact to a new version, and nothing here announces a public release. +> +> The public `.klickd` track remains **v4.0.0 GA**, with the 42 `x.klickd` artefacts framed as **v4.1 candidates**. v4.2 is **in preparation** (see [`docs/public/X_KLICKD_SITE_COPY.md`](../public/X_KLICKD_SITE_COPY.md) §7); no public v4.2 release is claimed here. +> +> Internal track names (e.g. `xklickd_internal_skill_v4_2`) **must not** leak into public-facing surfaces. Public wording remains `x.klickd`. + +## Contents + +- [`INTERNAL_SKILL_V4_2_MAPPING.md`](./INTERNAL_SKILL_V4_2_MAPPING.md) — the v4.2 internal target skill mapping (governance symmetry, `skill_lifecycle` rename, `output_contract.graph_bindings`, harmonised competency/domain naming, explicit `interactions` layer + canonical flow). From ed41ebbec0f14b41db9f572ec72eaae2caa87d71 Mon Sep 17 00:00:00 2001 From: klickd-agent Date: Tue, 2 Jun 2026 11:49:04 +0000 Subject: [PATCH 6/8] feat: deterministic supply-chain threat-model generator (v0.1) Add scripts/generate_supply_chain_threat_model.py: an offline, stdlib-only, deterministic static analyser for x.klickd candidate manifests. Classifies declared threats across 11 categories (authority_escalation, human_veto_bypass, tool_boundary_violation, memory_poisoning, private_public_leak, evidence_weakening, unsourced_claim, unsafe_external_action, irreversible_action, compliance_overclaim, stale_or_unlicensed_source_dependency), emits required mitigations, and blocks (exit 1) on unmitigated high/critical findings. Adds 7 candidate fixtures, a 14-case pytest suite, and an internal/draft doc. NON-NORMATIVE: not a security certification, no GDPR/EU AI Act compliance claim, no benchmark/universal-standard claim, no release. Public artefacts remain v4.1. Co-Authored-By: Claude Opus 4.7 --- docs/supply-chain/THREAT_MODEL_GENERATOR.md | 85 +++ scripts/generate_supply_chain_threat_model.py | 556 ++++++++++++++++++ .../candidate_compliance_overclaim.json | 29 + ...andidate_evidence_false_public_claims.json | 29 + .../candidate_external_action_no_gate.json | 28 + ...andidate_longterm_memory_no_promotion.json | 28 + .../threat-model/candidate_low_risk_ok.json | 31 + .../candidate_no_veto_sensitive_action.json | 28 + .../candidate_private_public_leak.json | 31 + tests/test_supply_chain_threat_model.py | 152 +++++ 10 files changed, 997 insertions(+) create mode 100644 docs/supply-chain/THREAT_MODEL_GENERATOR.md create mode 100644 scripts/generate_supply_chain_threat_model.py create mode 100644 tests/fixtures/threat-model/candidate_compliance_overclaim.json create mode 100644 tests/fixtures/threat-model/candidate_evidence_false_public_claims.json create mode 100644 tests/fixtures/threat-model/candidate_external_action_no_gate.json create mode 100644 tests/fixtures/threat-model/candidate_longterm_memory_no_promotion.json create mode 100644 tests/fixtures/threat-model/candidate_low_risk_ok.json create mode 100644 tests/fixtures/threat-model/candidate_no_veto_sensitive_action.json create mode 100644 tests/fixtures/threat-model/candidate_private_public_leak.json create mode 100644 tests/test_supply_chain_threat_model.py diff --git a/docs/supply-chain/THREAT_MODEL_GENERATOR.md b/docs/supply-chain/THREAT_MODEL_GENERATOR.md new file mode 100644 index 0000000..f7c3629 --- /dev/null +++ b/docs/supply-chain/THREAT_MODEL_GENERATOR.md @@ -0,0 +1,85 @@ +# Supply-chain threat-model generator (v0.1, internal / draft) + +**Status:** internal draft, NON-NORMATIVE. Not a release, not a public claim, +not a v4.2 GA artefact. The 42 public x.klickd artefacts remain v4.1. + +This document describes `scripts/generate_supply_chain_threat_model.py`, a +deterministic, offline, stdlib-only static analyser for x.klickd +skill/candidate manifests. + +## What it does + +Given a candidate manifest JSON, the generator: + +1. parses the manifest (`xklickd.candidate.v0.1`, or a minimal subset of its + keys); +2. computes a deterministic `candidate_hash` (sha256 over canonical JSON + bytes); +3. classifies declared threats by category; +4. emits `required_mitigations`; +5. produces a deterministic report JSON; +6. **blocks** (exit code 1) when any unmitigated `high` or `critical` + finding is present. + +Same input bytes → identical report bytes. + +## Usage + +```bash +python scripts/generate_supply_chain_threat_model.py \ + --candidate tests/fixtures/threat-model/candidate_low_risk_ok.json \ + --out .internal-skills/supply-chain/threat-model/report.json +``` + +- `--candidate PATH` (required): candidate manifest JSON. +- `--out PATH` (optional): write the report; otherwise print to stdout. +- `--no-block` (optional): report findings but always exit 0. + +Exit codes: `0` no blocking finding · `1` blocked (unmitigated high/critical) · +`2` usage / I/O / parse error. + +## Threat categories + +`authority_escalation`, `human_veto_bypass`, `tool_boundary_violation`, +`memory_poisoning`, `private_public_leak`, `evidence_weakening`, +`unsourced_claim`, `unsafe_external_action`, `irreversible_action`, +`compliance_overclaim`, `stale_or_unlicensed_source_dependency`. + +Severities: `low` / `medium` / `high` / `critical`. + +## Report shape (minimum fields) + +`schema_version`, `candidate_path`, `candidate_hash`, +`deterministic_threat_model_id`, `summary` (counts), `threats`, +`required_mitigations`, `blocked_findings`, `recommendations`, +`non_deterministic_zone`, `claim_boundaries`. + +## Blocking examples (see `tests/fixtures/threat-model/`) + +| Fixture | Category | Result | +| --- | --- | --- | +| `candidate_low_risk_ok` | — | pass (exit 0) | +| `candidate_no_veto_sensitive_action` | `human_veto_bypass` | block | +| `candidate_external_action_no_gate` | `unsafe_external_action` | block | +| `candidate_longterm_memory_no_promotion` | `memory_poisoning` | block | +| `candidate_private_public_leak` | `private_public_leak` | block | +| `candidate_evidence_false_public_claims` | `evidence_weakening` + `unsourced_claim` | block | +| `candidate_compliance_overclaim` | `compliance_overclaim` | block | + +## Claim boundaries (do NOT widen) + +This tool is **not** a security certification. It does **not** establish GDPR +or EU AI Act compliance, makes **no** benchmark-superiority or universal-standard +claim, does **not** prove the candidate is a loaded/executable skill, and is +**not** full automation. Findings reflect only what the manifest *declares* +about itself. Human review remains required. + +## Limits / what is tool-backed vs planned + +- **Tool-backed:** manifest parsing, deterministic hashing, the rule-based + classification above, deterministic report rendering, the block decision, + and the fixture-driven test suite. +- **Planned / out of scope:** runtime/behavioural analysis of a loaded + candidate, network scanning, legal/compliance assessment, integration into a + promotion gate, and richer source freshness/license resolution (the related + source-freshness work lives in a separate PR and is not depended on here). diff --git a/scripts/generate_supply_chain_threat_model.py b/scripts/generate_supply_chain_threat_model.py new file mode 100644 index 0000000..3e60d41 --- /dev/null +++ b/scripts/generate_supply_chain_threat_model.py @@ -0,0 +1,556 @@ +#!/usr/bin/env python3 +"""Deterministic supply-chain threat-model generator for x.klickd +skill/candidate manifests. + +Scope (NON-NORMATIVE, planning / governance tool): + - Parses a candidate manifest JSON (schema_version `xklickd.candidate.v0.1`, + or a minimal subset of those keys). + - Computes a deterministic candidate_hash over the canonical JSON bytes. + - Classifies threats per category against the candidate's declared + governance / tools / memory / output_contract / risk_profile. + - Emits required_mitigations and a deterministic report JSON. + - Blocks (exit 1) when any unmitigated `critical` or `high` finding is + present. + +What this tool IS: + - A deterministic, offline, stdlib-only static analyser of a *declared* + candidate manifest. Same input bytes -> identical report bytes. + +What this tool IS NOT (claim boundaries — do NOT widen): + - It is NOT a security certification. + - It does NOT establish GDPR or EU AI Act compliance. + - It does NOT execute, load, or network-scan the candidate. + - It does NOT prove the candidate is a loaded/executable skill. + - It makes no benchmark-superiority or universal-standard claim. + The findings reflect only what the manifest declares about itself. + +Threat categories (brief §"classer menaces par catégorie"): + authority_escalation, human_veto_bypass, tool_boundary_violation, + memory_poisoning, private_public_leak, evidence_weakening, + unsourced_claim, unsafe_external_action, irreversible_action, + compliance_overclaim, stale_or_unlicensed_source_dependency + +Severities: low / medium / high / critical + +Exit codes: + 0 no blocking finding (no unmitigated high/critical) + 1 at least one unmitigated high/critical finding -> candidate BLOCKED + 2 usage / I/O / parse error + +CLI: + python scripts/generate_supply_chain_threat_model.py \ + --candidate path/to/candidate.json \ + --out .internal-skills/supply-chain/threat-model/report.json +""" +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + +SCHEMA_VERSION = "xklickd.threat-model.v0.1" +CANDIDATE_SCHEMA_VERSION = "xklickd.candidate.v0.1" + +# Severity ordering for deterministic comparison / sorting. +SEVERITY_ORDER = {"low": 0, "medium": 1, "high": 2, "critical": 3} + +# Actions treated as sensitive even if the candidate does not flag them. +# These are the irreversible / externally-visible action classes the brief +# wants gated behind a human veto. +KNOWN_SENSITIVE_ACTIONS = ( + "external_communication", + "send_email", + "publish", + "deploy", + "file_write", + "file_delete", + "payment", + "network_request", + "code_execution", +) + +# Tool names that perform an external or irreversible action. Used to detect +# tool/action mismatches with the declared governance gates. +EXTERNAL_ACTION_TOOLS = ( + "send_email", + "publish", + "deploy", + "post_webhook", + "http_post", + "network_request", + "payment", +) +IRREVERSIBLE_ACTION_TOOLS = ( + "file_delete", + "deploy", + "publish", + "payment", + "drop_table", + "rm", +) + +# Substrings that, if present in any declared claim / overclaim field, are +# treated as a compliance / superiority overclaim. Mirrors the public claim +# boundaries we must preserve. +COMPLIANCE_OVERCLAIM_TERMS = ( + "gdpr compliant", + "gdpr compliance", + "automatic gdpr", + "eu ai act compliant", + "eu ai act compliance", + "ai act compliant", + "certified secure", + "security certified", + "security certification", + "universal standard", + "benchmark superiority", + "state of the art", + "best in class", + "fully automated compliance", +) + + +def _canonical_bytes(obj: Any) -> bytes: + """Canonical JSON encoding for hashing: sorted keys, no insignificant + whitespace, UTF-8. Deterministic for a given logical object.""" + return json.dumps( + obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") + + +def candidate_hash(candidate: dict) -> str: + """sha256 over the canonical bytes of the candidate object.""" + return "sha256:" + hashlib.sha256(_canonical_bytes(candidate)).hexdigest() + + +def deterministic_threat_model_id(candidate: dict, threats: list[dict]) -> str: + """Stable id derived from candidate hash + the sorted finding ids. + + Two runs on the same input produce the same id; a change in any finding + changes the id. Independent of dict ordering or run time.""" + finding_ids = sorted(t["id"] for t in threats) + seed = { + "schema_version": SCHEMA_VERSION, + "candidate_hash": candidate_hash(candidate), + "finding_ids": finding_ids, + } + return "tmid:" + hashlib.sha256(_canonical_bytes(seed)).hexdigest()[:32] + + +def _g(candidate: dict, *path: str, default: Any = None) -> Any: + """Safe nested getter.""" + cur: Any = candidate + for key in path: + if not isinstance(cur, dict) or key not in cur: + return default + cur = cur[key] + return cur + + +def _finding( + category: str, + severity: str, + title: str, + detail: str, + mitigations: list[str], + *, + mitigated: bool = False, +) -> dict: + """Build a finding with a deterministic id (category + title hash).""" + fid = "thr:" + hashlib.sha256( + f"{category}|{title}".encode("utf-8") + ).hexdigest()[:16] + return { + "id": fid, + "category": category, + "severity": severity, + "title": title, + "detail": detail, + "required_mitigations": list(mitigations), + "mitigated": bool(mitigated), + } + + +def classify(candidate: dict) -> list[dict]: + """Return the deterministic list of threat findings for a candidate. + + Each rule is a pure function of declared manifest fields. The output + order is the rule order below; the caller re-sorts deterministically. + """ + findings: list[dict] = [] + + gov = candidate.get("governance") or {} + mem = candidate.get("memory") or {} + tools = candidate.get("tools") or {} + risk = candidate.get("risk_profile") or {} + out = candidate.get("output_contract") or {} + + human_veto = bool(gov.get("human_veto_required")) + no_auto_external = bool(gov.get("no_auto_external_action")) + non_lowerable = list(gov.get("non_lowerable_rules") or []) + + sensitive_actions = list(risk.get("sensitive_actions") or []) + allowed_tools = list(tools.get("allowed") or []) + forbidden_tools = list(tools.get("forbidden") or []) + + declared_sensitive = set(sensitive_actions) + # Sensitive actions implied by the allowed tool set, even if not declared. + implied_sensitive = { + t for t in allowed_tools if t in KNOWN_SENSITIVE_ACTIONS + } + effective_sensitive = declared_sensitive | implied_sensitive + + # --- authority_escalation --- + if gov.get("requests_admin") or gov.get("self_grant_authority"): + findings.append(_finding( + "authority_escalation", "critical", + "Candidate requests elevated/admin authority", + "governance declares requests_admin/self_grant_authority, which " + "lets the candidate widen its own authority.", + ["Remove self-granted authority; authority must be assigned by a " + "human-owned governance layer, raise-only."], + )) + final_owner = str(gov.get("final_decision_owner", "")).lower() + if final_owner and not final_owner.startswith("human"): + findings.append(_finding( + "authority_escalation", "high", + "Final decision owner is not human", + f"governance.final_decision_owner='{final_owner}' is not a " + "human_* value.", + ["Set final_decision_owner to a human_* role."], + )) + + # --- human_veto_bypass --- + if effective_sensitive and not human_veto: + findings.append(_finding( + "human_veto_bypass", "critical", + "Sensitive actions present without human veto", + "candidate exposes sensitive actions " + f"{sorted(effective_sensitive)} but governance." + "human_veto_required is not true.", + ["Set governance.human_veto_required=true and route every " + "sensitive action through a raise-only human veto gate."], + )) + if gov.get("can_override_veto") or gov.get("veto_optional"): + findings.append(_finding( + "human_veto_bypass", "critical", + "Candidate can override or skip the human veto", + "governance declares can_override_veto/veto_optional.", + ["The human veto must be non-overridable (raise-only); remove the " + "override flag."], + )) + + # --- tool_boundary_violation --- + overlap = sorted(set(allowed_tools) & set(forbidden_tools)) + if overlap: + findings.append(_finding( + "tool_boundary_violation", "high", + "Tool appears in both allowed and forbidden lists", + f"tools allowed and forbidden overlap on {overlap}.", + ["Resolve the contradiction; a forbidden tool must not be " + "allowed."], + )) + + # --- memory_poisoning --- + writes_long_term = bool(mem.get("writes_long_term")) + has_promotion_rules = bool( + mem.get("promotion_rules") or mem.get("write_gate") + ) + if writes_long_term and not has_promotion_rules: + findings.append(_finding( + "memory_poisoning", "high", + "Long-term memory write without promotion/write gate", + "memory.writes_long_term is true but no promotion_rules / " + "write_gate is declared; unreviewed writes can poison memory.", + ["Declare memory.promotion_rules (human/gate-reviewed) before any " + "long-term write is persisted."], + )) + + # --- private_public_leak --- + reads_private = bool(mem.get("reads_private_context")) + forbidden_outputs = list(out.get("forbidden_outputs") or []) + has_public_output = bool( + out.get("emits_public_output") + or "publish" in effective_sensitive + or "external_communication" in effective_sensitive + ) + if candidate.get("private_public_leak") or _g( + candidate, "boundaries", "private_to_public_leak" + ): + findings.append(_finding( + "private_public_leak", "critical", + "Declared private-to-public data leak", + "candidate flags a private/public boundary leak.", + ["Eliminate the leak; private context must not cross into public " + "output without an explicit redaction + human gate."], + )) + elif reads_private and has_public_output and ( + "private_to_public" not in forbidden_outputs + and "unsourced_public_claim" not in forbidden_outputs + ): + findings.append(_finding( + "private_public_leak", "high", + "Private context readable and public output emitted without " + "boundary guard", + "memory.reads_private_context is true and the candidate can emit " + "public output, but output_contract.forbidden_outputs does not " + "guard the private->public boundary.", + ["Add a private->public boundary guard to " + "output_contract.forbidden_outputs and gate public emission."], + )) + + # --- evidence_weakening --- + non_lowerable_lower = {str(r).lower() for r in non_lowerable} + if candidate.get("lowers_evidence") or gov.get("evidence_optional"): + findings.append(_finding( + "evidence_weakening", "high", + "Candidate weakens or makes evidence optional", + "candidate declares lowers_evidence / evidence_optional.", + ["Evidence requirements are non-lowerable; restore the evidence " + "gate."], + )) + + # --- unsourced_claim --- + requires_citations = bool(out.get("requires_citations")) + if has_public_output and not requires_citations and ( + "no_unsourced_claim" not in non_lowerable_lower + ): + findings.append(_finding( + "unsourced_claim", "high", + "Public output without required citations", + "candidate emits public output but " + "output_contract.requires_citations is not true and no " + "no_unsourced_claim non-lowerable rule is declared.", + ["Set output_contract.requires_citations=true or declare the " + "no_unsourced_claim non-lowerable rule."], + )) + + # --- unsafe_external_action --- + external_tools = sorted(set(allowed_tools) & set(EXTERNAL_ACTION_TOOLS)) + if external_tools and not no_auto_external: + findings.append(_finding( + "unsafe_external_action", "critical", + "External-action tool allowed without no-auto-external gate", + f"candidate allows external-action tool(s) {external_tools} but " + "governance.no_auto_external_action is not true.", + ["Set governance.no_auto_external_action=true; external actions " + "must require an explicit human-gated step."], + )) + + # --- irreversible_action --- + irreversible_tools = sorted( + set(allowed_tools) & set(IRREVERSIBLE_ACTION_TOOLS) + ) + if irreversible_tools and not human_veto: + findings.append(_finding( + "irreversible_action", "high", + "Irreversible-action tool allowed without human veto", + f"candidate allows irreversible tool(s) {irreversible_tools} but " + "human_veto_required is not true.", + ["Gate irreversible actions behind a raise-only human veto and a " + "rollback/confirmation step."], + )) + + # --- compliance_overclaim --- + claim_texts: list[str] = [] + for key in ("claims", "marketing_claims", "description"): + v = candidate.get(key) + if isinstance(v, str): + claim_texts.append(v) + elif isinstance(v, list): + claim_texts.extend(str(x) for x in v) + joined = " ".join(claim_texts).lower() + matched_terms = sorted({t for t in COMPLIANCE_OVERCLAIM_TERMS if t in joined}) + if matched_terms: + findings.append(_finding( + "compliance_overclaim", "high", + "Compliance / superiority overclaim in candidate text", + f"candidate claim text contains overclaim term(s): " + f"{matched_terms}.", + ["Remove the overclaim; this tool establishes neither legal " + "compliance nor benchmark superiority."], + )) + + # --- stale_or_unlicensed_source_dependency --- + sources = candidate.get("sources") or [] + if isinstance(sources, list): + for src in sources: + if not isinstance(src, dict): + continue + name = str(src.get("name") or src.get("id") or src.get("uri") or "?") + lic = src.get("license") + if lic in (None, "", "unknown", "unspecified"): + findings.append(_finding( + "stale_or_unlicensed_source_dependency", "medium", + f"Source '{name}' has no resolved license", + f"source '{name}' declares no usable license " + f"(license={lic!r}).", + ["Resolve and record an explicit, compatible license for " + "the source before use."], + )) + if src.get("stale") is True or src.get("freshness") == "stale": + findings.append(_finding( + "stale_or_unlicensed_source_dependency", "medium", + f"Source '{name}' is marked stale", + f"source '{name}' is flagged stale.", + ["Refresh the source or pin a current, dated revision."], + )) + + return findings + + +def build_report(candidate: dict, candidate_path: str) -> dict: + """Assemble the deterministic threat-model report.""" + threats = classify(candidate) + # Deterministic ordering: severity desc, then category, then id. + threats_sorted = sorted( + threats, + key=lambda t: ( + -SEVERITY_ORDER.get(t["severity"], 0), + t["category"], + t["id"], + ), + ) + + counts: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "critical": 0} + by_category: dict[str, int] = {} + for t in threats_sorted: + counts[t["severity"]] = counts.get(t["severity"], 0) + 1 + by_category[t["category"]] = by_category.get(t["category"], 0) + 1 + + blocked_findings = [ + t for t in threats_sorted + if t["severity"] in ("high", "critical") and not t["mitigated"] + ] + + required_mitigations = sorted({ + m for t in threats_sorted for m in t["required_mitigations"] + }) + + recommendations: list[str] = [] + if blocked_findings: + recommendations.append( + "Resolve all high/critical findings and re-run before promotion." + ) + else: + recommendations.append( + "No blocking finding from the declared manifest; promotion gate " + "may proceed to the next pipeline stage (human review still " + "required)." + ) + if counts["medium"]: + recommendations.append( + "Review medium findings (e.g. source license/freshness) before " + "promotion." + ) + + return { + "schema_version": SCHEMA_VERSION, + "candidate_schema_version": candidate.get( + "schema_version", CANDIDATE_SCHEMA_VERSION + ), + "candidate_path": candidate_path, + "candidate_id": candidate.get("skill_id"), + "candidate_hash": candidate_hash(candidate), + "deterministic_threat_model_id": deterministic_threat_model_id( + candidate, threats_sorted + ), + "summary": { + "total": len(threats_sorted), + "by_severity": counts, + "by_category": dict(sorted(by_category.items())), + "blocked": len(blocked_findings), + }, + "threats": threats_sorted, + "required_mitigations": required_mitigations, + "blocked_findings": [t["id"] for t in blocked_findings], + "recommendations": recommendations, + "non_deterministic_zone": [ + "Human review judgement on each finding.", + "Runtime behaviour of the candidate once loaded (not executed " + "here).", + "External legal/compliance assessment (out of scope; this tool " + "makes no compliance claim).", + "Source content drift after the recorded candidate_hash.", + ], + "claim_boundaries": { + "is_security_certification": False, + "establishes_legal_compliance": False, + "is_full_automation": False, + "proves_loaded_executable_skill": False, + "note": "Findings reflect only what the manifest declares; this " + "tool is offline, stdlib-only, and non-normative.", + }, + } + + +def render(report: dict) -> str: + """Deterministic pretty JSON (sorted keys, trailing newline).""" + return json.dumps(report, indent=2, sort_keys=True, ensure_ascii=False) + "\n" + + +def main(argv: list[str]) -> int: + parser = argparse.ArgumentParser( + description="Deterministic supply-chain threat-model generator " + "for x.klickd candidate manifests (non-normative).", + ) + parser.add_argument( + "--candidate", required=True, + help="Path to candidate/skill manifest JSON.", + ) + parser.add_argument( + "--out", default=None, + help="Path to write the report JSON. If omitted, prints to stdout.", + ) + parser.add_argument( + "--no-block", action="store_true", + help="Report findings but always exit 0 (do not block on " + "high/critical). Default is to block.", + ) + args = parser.parse_args(argv[1:]) + + cand_path = Path(args.candidate) + if not cand_path.exists(): + print(f"ERROR: candidate not found: {cand_path}", file=sys.stderr) + return 2 + try: + candidate = json.loads(cand_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as e: + print(f"ERROR: candidate JSON parse failed: {e}", file=sys.stderr) + return 2 + if not isinstance(candidate, dict): + print("ERROR: candidate JSON must be an object", file=sys.stderr) + return 2 + + report = build_report(candidate, str(args.candidate)) + rendered = render(report) + + if args.out: + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(rendered, encoding="utf-8") + print(f"wrote {out_path}") + else: + sys.stdout.write(rendered) + + blocked = report["blocked_findings"] + if blocked and not args.no_block: + print( + f"BLOCKED: {len(blocked)} unmitigated high/critical finding(s): " + f"{blocked}", + file=sys.stderr, + ) + return 1 + print( + f"OK: {report['summary']['total']} finding(s); " + f"{len(blocked)} blocking.", + file=sys.stderr, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/tests/fixtures/threat-model/candidate_compliance_overclaim.json b/tests/fixtures/threat-model/candidate_compliance_overclaim.json new file mode 100644 index 0000000..d824b0f --- /dev/null +++ b/tests/fixtures/threat-model/candidate_compliance_overclaim.json @@ -0,0 +1,29 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-compliance-overclaim", + "domain": "legal", + "description": "This skill is GDPR compliant and EU AI Act compliant out of the box, a universal standard for benchmark superiority.", + "risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + }, + "governance": { + "human_veto_required": true, + "no_auto_external_action": true, + "final_decision_owner": "human_operator", + "non_lowerable_rules": ["no_unsourced_claim"] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": false + }, + "tools": { + "allowed": ["read_file"], + "forbidden": [] + }, + "output_contract": { + "requires_citations": true, + "emits_public_output": false, + "forbidden_outputs": [] + } +} diff --git a/tests/fixtures/threat-model/candidate_evidence_false_public_claims.json b/tests/fixtures/threat-model/candidate_evidence_false_public_claims.json new file mode 100644 index 0000000..79b9208 --- /dev/null +++ b/tests/fixtures/threat-model/candidate_evidence_false_public_claims.json @@ -0,0 +1,29 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-evidence-false-public", + "domain": "analysis", + "risk_profile": { + "default_risk": "medium", + "sensitive_actions": [] + }, + "governance": { + "human_veto_required": true, + "no_auto_external_action": true, + "final_decision_owner": "human_operator", + "evidence_optional": true, + "non_lowerable_rules": [] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": false + }, + "tools": { + "allowed": ["read_file"], + "forbidden": [] + }, + "output_contract": { + "requires_citations": false, + "emits_public_output": true, + "forbidden_outputs": [] + } +} diff --git a/tests/fixtures/threat-model/candidate_external_action_no_gate.json b/tests/fixtures/threat-model/candidate_external_action_no_gate.json new file mode 100644 index 0000000..d2f90e8 --- /dev/null +++ b/tests/fixtures/threat-model/candidate_external_action_no_gate.json @@ -0,0 +1,28 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-external-no-gate", + "domain": "comms", + "risk_profile": { + "default_risk": "high", + "sensitive_actions": ["external_communication"] + }, + "governance": { + "human_veto_required": true, + "no_auto_external_action": false, + "final_decision_owner": "human_operator", + "non_lowerable_rules": ["no_unsourced_claim"] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": false + }, + "tools": { + "allowed": ["read_file", "send_email"], + "forbidden": [] + }, + "output_contract": { + "requires_citations": true, + "emits_public_output": false, + "forbidden_outputs": [] + } +} diff --git a/tests/fixtures/threat-model/candidate_longterm_memory_no_promotion.json b/tests/fixtures/threat-model/candidate_longterm_memory_no_promotion.json new file mode 100644 index 0000000..230e75b --- /dev/null +++ b/tests/fixtures/threat-model/candidate_longterm_memory_no_promotion.json @@ -0,0 +1,28 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-longterm-memory", + "domain": "knowledge", + "risk_profile": { + "default_risk": "medium", + "sensitive_actions": [] + }, + "governance": { + "human_veto_required": true, + "no_auto_external_action": true, + "final_decision_owner": "human_operator", + "non_lowerable_rules": ["no_unsourced_claim"] + }, + "memory": { + "writes_long_term": true, + "reads_private_context": false + }, + "tools": { + "allowed": ["read_file"], + "forbidden": ["publish"] + }, + "output_contract": { + "requires_citations": true, + "emits_public_output": false, + "forbidden_outputs": [] + } +} diff --git a/tests/fixtures/threat-model/candidate_low_risk_ok.json b/tests/fixtures/threat-model/candidate_low_risk_ok.json new file mode 100644 index 0000000..97b66b7 --- /dev/null +++ b/tests/fixtures/threat-model/candidate_low_risk_ok.json @@ -0,0 +1,31 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-low-risk-reader", + "domain": "research", + "risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + }, + "governance": { + "human_veto_required": true, + "no_auto_external_action": true, + "final_decision_owner": "human_operator", + "non_lowerable_rules": ["no_stub_as_loaded_skill", "no_unsourced_claim"] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": false + }, + "tools": { + "allowed": ["read_file"], + "forbidden": ["send_email", "publish"] + }, + "output_contract": { + "requires_citations": true, + "emits_public_output": true, + "forbidden_outputs": ["unsourced_public_claim", "private_to_public"] + }, + "sources": [ + {"name": "skos-framework", "license": "CC-BY-4.0", "freshness": "current"} + ] +} diff --git a/tests/fixtures/threat-model/candidate_no_veto_sensitive_action.json b/tests/fixtures/threat-model/candidate_no_veto_sensitive_action.json new file mode 100644 index 0000000..1f7628e --- /dev/null +++ b/tests/fixtures/threat-model/candidate_no_veto_sensitive_action.json @@ -0,0 +1,28 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-no-veto-sensitive", + "domain": "ops", + "risk_profile": { + "default_risk": "high", + "sensitive_actions": ["file_write", "external_communication"] + }, + "governance": { + "human_veto_required": false, + "no_auto_external_action": true, + "final_decision_owner": "human_operator", + "non_lowerable_rules": ["no_unsourced_claim"] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": false + }, + "tools": { + "allowed": ["read_file"], + "forbidden": [] + }, + "output_contract": { + "requires_citations": true, + "emits_public_output": false, + "forbidden_outputs": [] + } +} diff --git a/tests/fixtures/threat-model/candidate_private_public_leak.json b/tests/fixtures/threat-model/candidate_private_public_leak.json new file mode 100644 index 0000000..04b9bb3 --- /dev/null +++ b/tests/fixtures/threat-model/candidate_private_public_leak.json @@ -0,0 +1,31 @@ +{ + "schema_version": "xklickd.candidate.v0.1", + "skill_id": "xklickd-private-public-leak", + "domain": "data", + "risk_profile": { + "default_risk": "high", + "sensitive_actions": [] + }, + "governance": { + "human_veto_required": true, + "no_auto_external_action": true, + "final_decision_owner": "human_operator", + "non_lowerable_rules": ["no_unsourced_claim"] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": true + }, + "tools": { + "allowed": ["read_file"], + "forbidden": [] + }, + "output_contract": { + "requires_citations": true, + "emits_public_output": true, + "forbidden_outputs": [] + }, + "boundaries": { + "private_to_public_leak": true + } +} diff --git a/tests/test_supply_chain_threat_model.py b/tests/test_supply_chain_threat_model.py new file mode 100644 index 0000000..89c4572 --- /dev/null +++ b/tests/test_supply_chain_threat_model.py @@ -0,0 +1,152 @@ +"""Tests for scripts/generate_supply_chain_threat_model.py. + +NON-NORMATIVE. Stdlib-only, offline. Exercises the deterministic +threat-model generator against the candidate fixtures under +tests/fixtures/threat-model/ and asserts the blocking behaviour and +determinism the brief requires. +""" +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPT = REPO_ROOT / "scripts" / "generate_supply_chain_threat_model.py" +FIXTURES = REPO_ROOT / "tests" / "fixtures" / "threat-model" + + +def _load_mod(): + spec = importlib.util.spec_from_file_location("sc_threat_model", SCRIPT) + assert spec and spec.loader, f"could not load {SCRIPT}" + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def _candidate(name: str) -> dict: + return json.loads((FIXTURES / name).read_text(encoding="utf-8")) + + +def _report(name: str) -> dict: + mod = _load_mod() + return mod.build_report(_candidate(name), name) + + +def test_script_exists(): + assert SCRIPT.exists() + + +def test_fixtures_exist(): + expected = { + "candidate_low_risk_ok.json", + "candidate_no_veto_sensitive_action.json", + "candidate_external_action_no_gate.json", + "candidate_longterm_memory_no_promotion.json", + "candidate_private_public_leak.json", + "candidate_evidence_false_public_claims.json", + "candidate_compliance_overclaim.json", + } + present = {p.name for p in FIXTURES.glob("*.json")} + assert expected <= present, expected - present + + +def test_low_risk_candidate_has_no_blocking_findings(): + rep = _report("candidate_low_risk_ok.json") + assert rep["blocked_findings"] == [] + assert rep["summary"]["by_severity"]["critical"] == 0 + assert rep["summary"]["by_severity"]["high"] == 0 + + +def test_no_veto_with_sensitive_action_blocks(): + rep = _report("candidate_no_veto_sensitive_action.json") + cats = {t["category"] for t in rep["threats"]} + assert "human_veto_bypass" in cats + assert rep["blocked_findings"] + + +def test_external_action_without_gate_blocks(): + rep = _report("candidate_external_action_no_gate.json") + cats = {t["category"] for t in rep["threats"]} + assert "unsafe_external_action" in cats + assert rep["blocked_findings"] + + +def test_longterm_memory_without_promotion_blocks_or_high(): + rep = _report("candidate_longterm_memory_no_promotion.json") + mem = [t for t in rep["threats"] if t["category"] == "memory_poisoning"] + assert mem, "expected a memory_poisoning finding" + assert mem[0]["severity"] in ("high", "critical") + assert rep["blocked_findings"] + + +def test_private_public_leak_blocks(): + rep = _report("candidate_private_public_leak.json") + cats = {t["category"] for t in rep["threats"]} + assert "private_public_leak" in cats + assert rep["blocked_findings"] + + +def test_evidence_false_with_public_claims_blocks(): + rep = _report("candidate_evidence_false_public_claims.json") + cats = {t["category"] for t in rep["threats"]} + assert "evidence_weakening" in cats + assert "unsourced_claim" in cats + assert rep["blocked_findings"] + + +def test_compliance_overclaim_blocks(): + rep = _report("candidate_compliance_overclaim.json") + cats = {t["category"] for t in rep["threats"]} + assert "compliance_overclaim" in cats + assert rep["blocked_findings"] + + +def test_output_is_deterministic(): + mod = _load_mod() + cand = _candidate("candidate_no_veto_sensitive_action.json") + r1 = mod.render(mod.build_report(cand, "x")) + r2 = mod.render(mod.build_report(cand, "x")) + assert r1 == r2 + + +def test_candidate_hash_is_order_independent(): + mod = _load_mod() + a = {"skill_id": "s", "tools": {"allowed": ["read_file"]}, "domain": "d"} + b = {"domain": "d", "tools": {"allowed": ["read_file"]}, "skill_id": "s"} + assert mod.candidate_hash(a) == mod.candidate_hash(b) + + +def test_threat_model_id_changes_with_findings(): + mod = _load_mod() + ok = _candidate("candidate_low_risk_ok.json") + bad = _candidate("candidate_no_veto_sensitive_action.json") + id_ok = mod.build_report(ok, "x")["deterministic_threat_model_id"] + id_bad = mod.build_report(bad, "x")["deterministic_threat_model_id"] + assert id_ok != id_bad + + +def test_report_has_required_fields(): + rep = _report("candidate_low_risk_ok.json") + for key in ( + "schema_version", + "candidate_path", + "candidate_hash", + "deterministic_threat_model_id", + "summary", + "threats", + "required_mitigations", + "blocked_findings", + "recommendations", + "non_deterministic_zone", + ): + assert key in rep, f"missing report field: {key}" + + +def test_claim_boundaries_preserved(): + rep = _report("candidate_low_risk_ok.json") + cb = rep["claim_boundaries"] + assert cb["is_security_certification"] is False + assert cb["establishes_legal_compliance"] is False + assert cb["is_full_automation"] is False + assert cb["proves_loaded_executable_skill"] is False From 12e3786a8a2255b234d96a78fde314f4dd95ac6e Mon Sep 17 00:00:00 2001 From: klickd integration agent Date: Tue, 2 Jun 2026 11:55:09 +0000 Subject: [PATCH 7/8] add internal supply-chain integration index + MASTER_BRIEF protocol doc --- .internal-skills/supply-chain/README.md | 52 +++++++++++++++++ MASTER_BRIEF.md | 76 +++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 .internal-skills/supply-chain/README.md create mode 100644 MASTER_BRIEF.md diff --git a/.internal-skills/supply-chain/README.md b/.internal-skills/supply-chain/README.md new file mode 100644 index 0000000..e122265 --- /dev/null +++ b/.internal-skills/supply-chain/README.md @@ -0,0 +1,52 @@ +# `.internal-skills/supply-chain/` — internal supply-chain stages (integration index) + +| | | +|---|---| +| **Status** | **Internal · NON-NORMATIVE · no release / no publish / no merge to main** | +| **Created** | 2026-06-02 | +| **Companion spec** | [`docs/rfcs/chimera/SUPPLY_CHAIN.md`](../../docs/rfcs/chimera/SUPPLY_CHAIN.md) (process spec) | +| **Rules of engagement** | [`MASTER_BRIEF.md`](../../MASTER_BRIEF.md) (anti-mirage rules, v4.1/v4.2 boundary) | + +> This directory holds **internal** supply-chain artefacts and the audit/diff/source-check records produced by the tool-backed stages. Nothing here is a public release, a normative spec, a schema, or an SDK contract. Read every stage label **literally** (`tool` / `planned`). A catalog entry or stub is **never** a loaded executable skill — see the loaded-skill gate in §"Loaded-skill gate". + +This README is the integration index that brings the supply-chain components together. It is the human-readable map of which stages are **real and tool-backed today** versus **planned**. + +--- + +## Real, tool-backed stages (shipped + tested) + +| Stage | Tool | Internal artefacts | Tests | +|---|---|---|---| +| **Audit / determinism** | `scripts/generate_supply_chain_audit.py` | `audit/audit_trail_index.json`, `audit/determinism_record.json` | `tests/test_supply_chain_audit.py` | +| **Logical diff** | `scripts/generate_supply_chain_diff.py` | `diff/` (report output) | `tests/test_supply_chain_diff.py` (+ `tests/fixtures/supply_chain_diff/`) | +| **Source freshness + license** | `scripts/check_supply_chain_sources.py` | `source-check/example_source_manifest.json` | `tests/test_supply_chain_sources.py` (+ `tests/fixtures/supply_chain_sources/`) | +| **Threat model** | `scripts/generate_supply_chain_threat_model.py` | (report output) · doc: `docs/supply-chain/THREAT_MODEL_GENERATOR.md` | `tests/test_supply_chain_threat_model.py` (+ `tests/fixtures/threat-model/`) | + +Each of these is `tool`: a runnable script with a passing test module and deterministic output. "Tool-backed" means the bytes and behaviour exist and are tested — it does **not** imply the end-to-end build runner exists. + +## Planned stages (specified, not built) + +| Stage | What it will do | Why it is not claimed yet | +|---|---|---| +| **Candidate generation** | Produce a candidate `carrier_pack` from a config-only build request (the build *runner*). | No runner is shipped; the *process* is specified, the executor is not. | +| **Promotion gate** | Pass/fail enforcement that blocks a candidate from being promoted unless all checks pass. | No gate enforces promotion today; checks run, but nothing blocks on them. | +| **Full PII / secrets scanner** | Scan candidate inputs/outputs for PII and secrets beyond the engineering source/license checks. | Current `source-check` is an engineering license/freshness check, **not** a compliance attestation or a PII scanner. | +| **Runtime enforcement** | Enforce guardrails in-loop at execution time, not just at build/audit time. | Build-time checks exist; runtime enforcement does not. | + +--- + +## Loaded-skill gate + +A pack/skill is "loaded" or "used" **only** when: + +``` +artifact_loaded == true AND sha256_matches_manifest == true +``` + +per [`docs/integrations/skill-loader-protocol.md`](../../docs/integrations/skill-loader-protocol.md). Anything short of that — a manifest row, a stub, a routing placeholder — is **not** a loaded skill and must not be described as one. + +## Boundary reminder + +- Public artefacts remain **v4.1**. **No public v4.2 claim.** v4.2 is an internal target only (`docs/internal/`). +- No release, publish, tag, DOI, external communication, merge to main, or PR approval from supply-chain work. +- Do not touch `Davincc77/klickd-ai`. diff --git a/MASTER_BRIEF.md b/MASTER_BRIEF.md new file mode 100644 index 0000000..77f60b0 --- /dev/null +++ b/MASTER_BRIEF.md @@ -0,0 +1,76 @@ +# MASTER_BRIEF — x.klickd internal supply-chain protocol + +| | | +|---|---| +| **Status** | **Internal · NON-NORMATIVE · binding on agents working in this repo** | +| **Created** | 2026-06-02 | +| **Scope** | The internal x.klickd skill supply chain (`.internal-skills/`, `scripts/generate_supply_chain_*`, `scripts/check_supply_chain_*`, `docs/internal/`, `docs/supply-chain/`, `docs/rfcs/chimera/`) | + +> This file exists so that **future agents do not depend on any external or workspace-local brief that may be missing.** It restates the non-negotiable rules in-repo. If a delegated task references a brief you cannot find, this document is the authoritative fallback. Read it fully before acting. + +--- + +## 0. Why this document exists + +Work on the internal supply chain is delegated across many agents. Each delegation must carry complete context; an agent must never guess at intent or rely on a workspace path that may not be present. The companion process spec is [`docs/rfcs/chimera/SUPPLY_CHAIN.md`](docs/rfcs/chimera/SUPPLY_CHAIN.md) (non-normative, describes the pipeline). This brief captures the **rules of engagement** that sit above any individual task. + +--- + +## 1. Anti-mirage rules (non-negotiable) + +These rules exist to prevent the appearance of capability that does not exist ("mirage"). Apply them literally. + +1. **No loaded-skill claim without proof.** A pack/skill is "loaded" or "used" **only** when `artifact_loaded = true` **and** `sha256_matches_manifest = true`, per [`docs/integrations/skill-loader-protocol.md`](docs/integrations/skill-loader-protocol.md). A catalog entry, a stub, a routing placeholder, a manifest row, or a marketing page is **never** a loaded executable skill. Do not describe one as such. +2. **Read the per-stage label literally.** Each pipeline stage is labelled `tool` (shipped, runnable, tested), `manual` (a human/agent procedure), or `planned` (specified, not built). Never describe a `planned` or `manual` stage as automated or shipped. +3. **No claims beyond evidence.** No "universal standard", no "automatic GDPR / EU AI Act / sectoral compliance", no "benchmark superiority proven", no automation percentage stated as a measured result. The 70–80% automation figure is a **design target**, not a guarantee. +4. **Determinism is claimed only where recorded.** A stage is deterministic only if a determinism record (same `input_hash` → same `output_hash`) backs it. Otherwise say so. +5. **No external action.** No release, no publish (npm / PyPI), no `latest` tag, no git tag, no Zenodo DOI, no IANA action, no external communication (email, Slack, social), no merge to `main`, no PR approval. Integration work opens **draft** PRs only. +6. **Repo isolation.** Do **not** touch `Davincc77/klickd-ai`. This work lives only in `Davincc77/klickdskill`. + +--- + +## 2. Public v4.1 vs internal v4.2 boundary + +- The public `.klickd` track is **v4.0.0 GA**; the 42 `x.klickd` artefacts are framed as **v4.1 candidates**. These are the only public version claims. +- v4.2 exists **only as an internal target** (see [`docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md`](docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md)). **Do not claim a public v4.2 release.** "v4.2 in preparation" is the maximum public-facing statement, and it lives only where already written. +- Internal track codenames (e.g. `xklickd_internal_skill_v4_2`, sibling-path codenames) are **internal identifiers only** and MUST NOT propagate to public surfaces: `README.md`, `docs/public/*`, package metadata, or any published artefact. + +--- + +## 3. Current real (tool-backed) stages vs planned stages + +See [`.internal-skills/supply-chain/README.md`](.internal-skills/supply-chain/README.md) for the authoritative per-stage table. Summary: + +**Real, tool-backed today (shipped + tested):** + +- **Audit / determinism** — `scripts/generate_supply_chain_audit.py` (audit-trail index + determinism record). +- **Logical diff** — `scripts/generate_supply_chain_diff.py` (before/after candidate diff with violation classes). +- **Source freshness + license compatibility** — `scripts/check_supply_chain_sources.py`. +- **Threat model** — `scripts/generate_supply_chain_threat_model.py` (deterministic threat-model generator v0.1). + +**Planned (specified, not built):** + +- Candidate generation (the build *runner*). +- Promotion gate (pass/fail enforcement that blocks promotion). +- Full PII / secrets scanner (beyond the engineering source/license checks). +- Runtime enforcement (in-loop guardrail enforcement at execution time). + +--- + +## 4. Required validations before any integration PR + +Run and report exactly (including pre-existing baseline failures): + +- New supply-chain tests: `pytest tests/test_supply_chain_audit.py tests/test_supply_chain_diff.py tests/test_supply_chain_sources.py tests/test_supply_chain_threat_model.py` +- Pack verifier: `python scripts/verify_xklickd_skill_packs.py` +- Candidate mapping validator: `python scripts/validate_v4_1_candidate_mapping.py` +- v4 schema validator: `python scripts/validate_v4_schemas.py` +- Public codename / forbidden-claim greps over changed files (see §1, §2). + +Report baseline failures **as-is**; do not mask them. Known pre-existing baseline: nested `packages/`, `benchmarks/`, `examples/`, `integrations/` test modules fail collection under root `pytest` due to import-path issues unrelated to the supply chain. Scope to `tests/` for a clean signal. + +--- + +## 5. Next step after integration + +The next step is **not** public release. It is: build the **runner candidate generator + promotion gate** (the two highest-leverage `planned` stages), so that candidate packs can be generated and gated end-to-end before any promotion decision. From c075037aeb5666c7dffb683f7d9eb7bbfb50ef32 Mon Sep 17 00:00:00 2001 From: klickd-agent Date: Tue, 2 Jun 2026 12:17:02 +0000 Subject: [PATCH 8/8] feat(supply-chain): internal candidate generator + combined promotion gate (v0.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the next two tool-backed supply-chain stages on top of the cumulative integration branch, both NON-NORMATIVE and internal-only: - scripts/generate_supply_chain_candidate.py: config-only build_request -> candidate skill in the internal v4.2 target shape. Deterministic ids derived only from input bytes; missing domain info -> requires_human_premium_pass (never hallucinated); sources only from build_request/source_manifest. - scripts/run_supply_chain_promotion_gate.py: orchestrates threat-model, source/license, logical-diff, candidate-shape, and forbidden-claim / public-private boundary tripwires. Classifies ACCEPT / ACCEPT_WITH_REVIEW / BLOCK (exit 0/0/1, 2 usage). Reports — never runs — the premium pass; not_run checks recorded honestly. Tests: 39 new (deterministic repeatability, anti-mirage premium-pass, blocked candidate, clean accept, forbidden-claim/leak/over-claim blocks). Full suite 283 passed. Example candidate + gate report checked in. ACTION_LOG added; README integration index updated (stages moved planned -> tool-backed with literal scope notes). No release/tag/DOI/publish/deploy. No merge to main. Public stays v4.1. Co-Authored-By: Claude Opus 4.7 --- .internal-skills/supply-chain/ACTION_LOG.md | 81 +++ .internal-skills/supply-chain/README.md | 18 +- .../candidates/xklickd-research-reader.json | 315 ++++++++ .../xklickd-research-reader.gate.json | 117 +++ .../xklickd-research-reader.gate.md | 19 + scripts/generate_supply_chain_candidate.py | 676 ++++++++++++++++++ scripts/run_supply_chain_promotion_gate.py | 565 +++++++++++++++ .../build_request_clean.json | 50 ++ .../build_request_missing_domain.json | 10 + .../source_manifest_ok.json | 14 + tests/test_supply_chain_candidate.py | 229 ++++++ tests/test_supply_chain_promotion_gate.py | 267 +++++++ 12 files changed, 2356 insertions(+), 5 deletions(-) create mode 100644 .internal-skills/supply-chain/ACTION_LOG.md create mode 100644 .internal-skills/supply-chain/candidates/xklickd-research-reader.json create mode 100644 .internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.json create mode 100644 .internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.md create mode 100644 scripts/generate_supply_chain_candidate.py create mode 100644 scripts/run_supply_chain_promotion_gate.py create mode 100644 tests/fixtures/supply_chain_candidate/build_request_clean.json create mode 100644 tests/fixtures/supply_chain_candidate/build_request_missing_domain.json create mode 100644 tests/fixtures/supply_chain_candidate/source_manifest_ok.json create mode 100644 tests/test_supply_chain_candidate.py create mode 100644 tests/test_supply_chain_promotion_gate.py diff --git a/.internal-skills/supply-chain/ACTION_LOG.md b/.internal-skills/supply-chain/ACTION_LOG.md new file mode 100644 index 0000000..328e9bf --- /dev/null +++ b/.internal-skills/supply-chain/ACTION_LOG.md @@ -0,0 +1,81 @@ +# `.internal-skills/supply-chain/ACTION_LOG.md` — append-only internal action log + +> Internal · NON-NORMATIVE. Append-only. Records actions, validations, and +> limitations for supply-chain runner/gate work. No external action is recorded +> here because none was taken (no release, tag, DOI, publish, deploy, or +> communication). The private repo `Davincc77/klickd-ai` was not touched. + +--- + +## 2026-06-02 — candidate generator + promotion gate (v0.1) + +- **Branch:** `feat/supply-chain-runner-gate`, stacked on + `integration/supply-chain-cumulative` (PR #121). +- **Base for PR:** `integration/supply-chain-cumulative` (NOT `main`). + +### Added +- `scripts/generate_supply_chain_candidate.py` — internal candidate generator + (runner). Config-only `build_request` JSON → candidate skill in the internal + v4.2 target shape under `.internal-skills/supply-chain/candidates/` or + `--out`. Deterministic: `candidate_id` / `candidate_hash` / `run_id` derived + only from canonical build_request bytes (+ resolved source manifest hash). + No `generated_at` in the hashed core. Sources come only from the + build_request / referenced source_manifest; missing domain info → + `requires_human_premium_pass`, never hallucinated. +- `scripts/run_supply_chain_promotion_gate.py` — combined promotion gate. + Orchestrates threat model (always), source/license (when `--source-manifest`), + logical diff (when `--before`), candidate shape checks, and forbidden-claim / + public-private boundary tripwires. Classifies ACCEPT / ACCEPT_WITH_REVIEW / + BLOCK. Exit 0 acceptable, 1 BLOCK, 2 usage. `deterministic_gate_id` excludes + the clock (`eval_date`). Reports — does not run — premium pass. `not_run` + checks recorded with a reason, never as `pass`. +- `tests/test_supply_chain_candidate.py` (20 tests), + `tests/test_supply_chain_promotion_gate.py` (19 tests). +- `tests/fixtures/supply_chain_candidate/` — `build_request_clean.json`, + `build_request_missing_domain.json`, `source_manifest_ok.json`. +- Example artefacts: `candidates/xklickd-research-reader.json`, + `promotion-gate/xklickd-research-reader.gate.json` + `.gate.md`. +- Updated `README.md` integration index: moved Candidate generation + Promotion + gate from "planned" to tool-backed, with literal scope notes. + +### Commands run (local, offline, stdlib-only) +- `python scripts/generate_supply_chain_candidate.py --build-request --out ` +- `python scripts/run_supply_chain_promotion_gate.py --candidate [--source-manifest ] [--before ] --out --md --eval-date 2026-06-02` +- `python -m pytest tests/test_supply_chain_*.py` → 102 passed. +- `python -m pytest tests/` → 283 passed, 1 unrelated DeprecationWarning + (jsonschema.__version__), 0 failures. +- `python scripts/verify_xklickd_skill_packs.py verify` → rc 0. +- `python scripts/validate_v4_schemas.py` → rc 0. +- `python scripts/validate_v4_1_candidate_mapping.py` → rc 0. +- Forbidden-claims / codename grep over committed `candidates/` and + `promotion-gate/` artefacts → CLEAN (no banned substring). Internal track name + `xklickd_internal_skill_v4_2` appears only inside the candidate's + `internal_target` block, as designed. + +### Validations / behaviour confirmed +- Deterministic repeatability: identical build_request → identical + candidate_id/hash; identical candidate → identical gate_id, stable across + differing `--eval-date`. +- Missing domain info → `requires_human_premium_pass=true` with named gaps; + no competencies/sources hallucinated. +- Clean candidate → gate ACCEPT (exit 0). +- Missing-domain candidate → gate ACCEPT_WITH_REVIEW (exit 0), + premium_pass_required=true. +- Forbidden claim, internal codename, private→public leak, public v4.2 + over-claim, missing v4.2 layer, completeness claim → gate BLOCK (exit 1). + +### Limitations (no mirage) +- Emitting the v4.2 target shape is NOT a claim of supply-chain completeness; a + generated candidate is NOT a loaded executable skill (fails the loaded-skill + gate: requires artifact_loaded AND sha256_matches_manifest). +- The gate's boundary tripwire is a coarse guard, not a full PII/secrets + scanner (still a planned stage). Runtime enforcement remains planned. +- No legal/compliance, security-certification, or benchmark-superiority claim. +- Premium pass is reported as required where applicable but is NOT executed. + +### Explicitly NOT done +- No release, tag, DOI, npm/PyPI publish, GitHub Release, or deploy. +- No merge to `main`. +- No external communication. +- No change to `Davincc77/klickd-ai`. +- No public artefact promoted to v4.2 (public stays v4.1 candidates). diff --git a/.internal-skills/supply-chain/README.md b/.internal-skills/supply-chain/README.md index e122265..966d72f 100644 --- a/.internal-skills/supply-chain/README.md +++ b/.internal-skills/supply-chain/README.md @@ -21,17 +21,25 @@ This README is the integration index that brings the supply-chain components tog | **Logical diff** | `scripts/generate_supply_chain_diff.py` | `diff/` (report output) | `tests/test_supply_chain_diff.py` (+ `tests/fixtures/supply_chain_diff/`) | | **Source freshness + license** | `scripts/check_supply_chain_sources.py` | `source-check/example_source_manifest.json` | `tests/test_supply_chain_sources.py` (+ `tests/fixtures/supply_chain_sources/`) | | **Threat model** | `scripts/generate_supply_chain_threat_model.py` | (report output) · doc: `docs/supply-chain/THREAT_MODEL_GENERATOR.md` | `tests/test_supply_chain_threat_model.py` (+ `tests/fixtures/threat-model/`) | +| **Candidate generation** | `scripts/generate_supply_chain_candidate.py` | `candidates/` (example: `candidates/xklickd-research-reader.json`) | `tests/test_supply_chain_candidate.py` (+ `tests/fixtures/supply_chain_candidate/`) | +| **Promotion gate** | `scripts/run_supply_chain_promotion_gate.py` | `promotion-gate/` (example: `promotion-gate/xklickd-research-reader.gate.json` + `.md`) | `tests/test_supply_chain_promotion_gate.py` | -Each of these is `tool`: a runnable script with a passing test module and deterministic output. "Tool-backed" means the bytes and behaviour exist and are tested — it does **not** imply the end-to-end build runner exists. +Each of these is `tool`: a runnable script with a passing test module and deterministic output. "Tool-backed" means the bytes and behaviour exist and are tested — it does **not** imply the supply chain is complete, that any candidate is a loaded skill, or that a public release exists. + +### Candidate generator scope (read literally) + +`generate_supply_chain_candidate.py` emits the **internal v4.2 target shape** from a config-only `build_request`. Emitting the shape is **not** a claim that every lifecycle stage is implemented or verified — and a generated candidate is **not** a loaded executable skill (it fails the loaded-skill gate below). When domain information is missing, the runner marks `requires_human_premium_pass` rather than inventing competencies, risk, or sources. Sources come **only** from the `build_request` / referenced `source_manifest`. + +### Promotion gate scope (read literally) + +`run_supply_chain_promotion_gate.py` orchestrates the tool-backed checks (threat model always; source/license when a manifest is given; logical diff when a `--before` is given) plus candidate shape checks and forbidden-claim / public-private boundary tripwires. It classifies **ACCEPT / ACCEPT_WITH_REVIEW / BLOCK** and **reports** whether a human premium pass is required — it does **not** run that pass, and makes no compliance/security/benchmark claim. A check that could not run is recorded `not_run` with a reason, never as `pass`. ## Planned stages (specified, not built) | Stage | What it will do | Why it is not claimed yet | |---|---|---| -| **Candidate generation** | Produce a candidate `carrier_pack` from a config-only build request (the build *runner*). | No runner is shipped; the *process* is specified, the executor is not. | -| **Promotion gate** | Pass/fail enforcement that blocks a candidate from being promoted unless all checks pass. | No gate enforces promotion today; checks run, but nothing blocks on them. | -| **Full PII / secrets scanner** | Scan candidate inputs/outputs for PII and secrets beyond the engineering source/license checks. | Current `source-check` is an engineering license/freshness check, **not** a compliance attestation or a PII scanner. | -| **Runtime enforcement** | Enforce guardrails in-loop at execution time, not just at build/audit time. | Build-time checks exist; runtime enforcement does not. | +| **Full PII / secrets scanner** | Scan candidate inputs/outputs for PII and secrets beyond the engineering source/license checks. | Current `source-check` is an engineering license/freshness check, **not** a compliance attestation or a PII scanner. The gate's boundary tripwire is a coarse guard, not a full scanner. | +| **Runtime enforcement** | Enforce guardrails in-loop at execution time, not just at build/audit time. | Build-time checks + the promotion gate exist; runtime enforcement does not. | --- diff --git a/.internal-skills/supply-chain/candidates/xklickd-research-reader.json b/.internal-skills/supply-chain/candidates/xklickd-research-reader.json new file mode 100644 index 0000000..b71e930 --- /dev/null +++ b/.internal-skills/supply-chain/candidates/xklickd-research-reader.json @@ -0,0 +1,315 @@ +{ + "audit": { + "audit_trail_stage": "audit_trail_index", + "build_request_hash": "sha256:1f439bb471fc5bc90b95aa685b340becf75a1a578069f9193aad1f62e2315174", + "emits_audit_event_per_output": true + }, + "build_request_hash": "sha256:1f439bb471fc5bc90b95aa685b340becf75a1a578069f9193aad1f62e2315174", + "candidate_hash": "sha256:e6369e8b9b1f91b65d17f68db2fe4df7efa5c9fcbd545db0b8c3bb72e9835ad3", + "candidate_id": "sha256:2dc00bf2ebb3251025b8de1b8bbfcd426f5edc5bb7f245d20d305857ab376bff", + "competency_architecture": { + "competency_core": { + "base_transversal_core": { + "transversal_refs": [ + "ESCO:S1.transversal_thinking", + "ESCO:S2.transversal_collaboration", + "ESCO:S3.transversal_communication", + "LifeComp:Personal.self_regulation", + "LifeComp:Social.cooperation", + "LifeComp:Learning.learning_to_learn", + "DigComp:transversal.responsible_use" + ] + }, + "foundation_competencies": [ + "ESCO:S1.transversal_thinking", + "ESCO:S2.transversal_collaboration", + "ESCO:S3.transversal_communication", + "LifeComp:Personal.self_regulation", + "LifeComp:Social.cooperation", + "LifeComp:Learning.learning_to_learn", + "DigComp:transversal.responsible_use" + ], + "note": "Foundation/transversal anchors are framework-referenced structural names, not fabricated domain knowledge.", + "transversal_competencies": [ + "WEF:critical_thinking", + "WEF:problem_solving", + "WEF:creativity", + "WEF:adaptability", + "WEF:ethical_reasoning", + "ESCO:information_literacy", + "ESCO:digital_literacy", + "LifeComp:growth_mindset", + "LifeComp:empathy", + "DigComp:information_evaluation", + "DigComp:data_protection_awareness", + "DigComp:safety" + ] + }, + "domain": "research", + "domain_output_requirements": { + "format": "cited_summary" + }, + "domain_risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + }, + "harmonized_layers": [ + "competency_core", + "primary_domain_competencies", + "secondary_domain_competencies", + "domain_risk_profile", + "domain_output_requirements" + ], + "primary_domain_competencies": [ + "ESCO:research_methods", + "ESCO:academic_writing" + ], + "secondary_domain_competencies": [ + "ESCO:citation_management" + ] + }, + "context_graph": { + "edge_types": [ + "scopes", + "requires", + "creates", + "vetoes", + "audits" + ], + "node_types": [ + "competency", + "memory", + "evidence", + "policy", + "action", + "audit" + ], + "traversed_by_runtime": true + }, + "domain": "research", + "evidence": { + "requires_citations": true, + "source_count": 1, + "sources": [ + { + "category": "academic", + "freshness": "current", + "id": "skos", + "license": "CC-BY-4.0", + "name": "skos-framework", + "published_at": "2025-01-01", + "title": "SKOS Reference", + "url": "https://example.org/skos", + "usage": "reference" + } + ] + }, + "governance": { + "final_decision_owner": "human_operator", + "human_veto_required": true, + "no_auto_external_action": true, + "non_lowerable_rules": [ + "no_stub_as_loaded_skill", + "no_unsourced_claim" + ] + }, + "governance_system": { + "_declared_sensitive_actions": [], + "action_gates": [], + "approval_lifecycle": [ + "requested", + "granted", + "denied", + "expired" + ], + "authority_hierarchy": [ + "human_operator", + "human_reviewer", + "agent" + ], + "consent_rules": [], + "escalation_rules": [], + "final_decision_owner": "human_operator", + "governance_audit": { + "emits_to": "audit" + }, + "human_veto": { + "lowerable": false, + "note": "raise-only; not lowerable by an agent", + "required": true + }, + "human_veto_required": true, + "no_auto_external_action": true, + "non_lowerable_rules": [ + "no_stub_as_loaded_skill", + "no_unsourced_claim" + ], + "policy_conflict_resolution": "strictest_rule_wins", + "revocation_rules": [], + "risk_levels": [ + "low", + "medium", + "high", + "critical" + ] + }, + "interactions": { + "canonical_flow": [ + "user_task", + "intent_detection", + "competency_activation", + "memory_retrieval", + "context_graph_traversal", + "evidence_resolution", + "policy_evaluation", + "output_contract_check", + "human_veto_if_required", + "response_or_action", + "audit_event", + "memory_update_candidate" + ], + "flows": [ + "task_to_competency_flow", + "competency_to_memory_flow", + "memory_to_context_graph_flow", + "context_graph_to_policy_flow", + "policy_to_output_contract_flow", + "output_to_audit_flow", + "lifecycle_to_runtime_flow" + ], + "human_veto_if_required_lowerable": false, + "memory_update_is_candidate_only": true + }, + "internal_target": { + "note": "Internal v4.2 target shape. NOT a public v4.2 release; public x.klickd artefacts remain v4.1 candidates.", + "public_version": "v4.1", + "track": "xklickd_internal_skill_v4_2" + }, + "kind": "xklickd_internal_candidate_skill", + "memory": { + "promotion_rules": [], + "reads_private_context": false, + "writes_long_term": false + }, + "memory_governance": { + "every_action_influencing_write_is_governed": true, + "every_governance_decision_consulting_memory_is_audited": true, + "role": "bridge_between_memory_and_governance" + }, + "memory_system": { + "promotion_rules": [], + "reads_private_context": false, + "retention": "session_default", + "retrieval": "scoped_by_active_competency", + "write_candidates": "subject_to_memory_governance", + "writes_long_term": false + }, + "metadata": { + "domain": "research", + "publisher": "internal", + "size_tier": "lite", + "skill_id": "xklickd-research-reader", + "status": "candidate", + "title": "Research Reader" + }, + "non_normative": true, + "output_contract": { + "allowed_outputs": [ + "text_response" + ], + "emits_public_output": true, + "forbidden_outputs": [ + "private_to_public", + "unsourced_public_claim" + ], + "graph_bindings": { + "creates_action_node": false, + "may_trigger_veto_edge": true, + "requires_evidence_node": true, + "requires_policy_node": true, + "writes_audit_edge": true + }, + "required_audit_event": true, + "required_citations": true, + "required_handoff_summary": true, + "required_uncertainty_markers": true, + "requires_citations": true + }, + "premium_pass_status": { + "gaps": [], + "note": "Missing domain information is surfaced as a gap, never hallucinated. A clean candidate has an empty gaps list.", + "requires_human_premium_pass": false + }, + "risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + }, + "run_id": "sha256:2dc00bf2ebb3251025b8de1b8bbfcd426f5edc5bb7f245d20d305857ab376bff", + "runtime": { + "lifecycle_gates_availability": true, + "loadable_only_if_promoted": true, + "tools": { + "allowed": [ + "read_file" + ], + "forbidden": [ + "publish", + "send_email" + ] + } + }, + "schema_version": "xklickd.candidate.v0.1", + "security": { + "declared_classification": "internal", + "no_real_pii_in_candidate": true, + "no_secrets_in_candidate": true, + "private_public_boundary_guarded": true + }, + "skill_id": "xklickd-research-reader", + "skill_lifecycle": { + "completeness_claimed": false, + "note": "Target lifecycle layout only; not an assertion that every stage is implemented or verified. release_record is an INTERNAL record, never a public tag/DOI/package/release.", + "renamed_from": "supply_chain", + "stages": [ + "build_request", + "source_manifest", + "generated_candidate", + "validation_pipeline", + "audit_trail_index", + "determinism_record", + "logical_diff_report", + "source_license_report", + "threat_model_report", + "benchmark_report", + "premium_pass_report", + "promotion_gate", + "rollback_protocol", + "deprecation_protocol", + "release_record" + ] + }, + "source_manifest": null, + "source_manifest_hash": null, + "sources": [ + { + "category": "academic", + "freshness": "current", + "id": "skos", + "license": "CC-BY-4.0", + "name": "skos-framework", + "published_at": "2025-01-01", + "title": "SKOS Reference", + "url": "https://example.org/skos", + "usage": "reference" + } + ], + "tools": { + "allowed": [ + "read_file" + ], + "forbidden": [ + "publish", + "send_email" + ] + } +} diff --git a/.internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.json b/.internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.json new file mode 100644 index 0000000..1fbeb9d --- /dev/null +++ b/.internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.json @@ -0,0 +1,117 @@ +{ + "blocking_findings": [], + "candidate_hash": "sha256:e6369e8b9b1f91b65d17f68db2fe4df7efa5c9fcbd545db0b8c3bb72e9835ad3", + "candidate_id": "sha256:2dc00bf2ebb3251025b8de1b8bbfcd426f5edc5bb7f245d20d305857ab376bff", + "candidate_path": ".internal-skills/supply-chain/candidates/xklickd-research-reader.json", + "checks": [ + { + "blocking_findings": [], + "check": "candidate_shape", + "detail": { + "required_layers": [ + "metadata", + "competency_architecture", + "memory_system", + "governance_system", + "memory_governance", + "runtime", + "context_graph", + "interactions", + "evidence", + "security", + "audit", + "skill_lifecycle", + "output_contract" + ] + }, + "review_findings": [], + "verdict": "pass" + }, + { + "blocking_findings": [], + "check": "boundary_tripwires", + "detail": {}, + "review_findings": [], + "verdict": "pass" + }, + { + "blocking_findings": [], + "check": "threat_model", + "detail": { + "deterministic_threat_model_id": "tmid:4465464eef334cd591f8703405c67744", + "summary": { + "blocked": 0, + "by_category": {}, + "by_severity": { + "critical": 0, + "high": 0, + "low": 0, + "medium": 0 + }, + "total": 0 + } + }, + "review_findings": [], + "verdict": "pass" + }, + { + "blocking_findings": [], + "check": "source_license", + "detail": { + "deterministic_report_id": "sha256:10dde9d9942f1d13938bf046be3ac6470a10aad763979315334a7ef1f0a13126", + "summary": { + "allowed": 1, + "blocked": 0, + "review": 0, + "total_sources": 1 + } + }, + "review_findings": [], + "verdict": "pass" + }, + { + "blocking_findings": [], + "check": "logical_diff", + "detail": { + "reason": "no --before candidate provided" + }, + "review_findings": [], + "verdict": "not_run" + }, + { + "blocking_findings": [], + "check": "premium_pass_required", + "detail": { + "gaps": [], + "requires_human_premium_pass": false + }, + "review_findings": [], + "verdict": "pass" + } + ], + "claim_boundaries": { + "establishes_legal_compliance": false, + "is_full_automation": false, + "is_security_certification": false, + "note": "The gate orchestrates offline, stdlib-only checks and reports whether a human premium pass is required; it does not run that pass and makes no compliance claim.", + "proves_loaded_executable_skill": false, + "runs_premium_pass": false + }, + "classification": "ACCEPT", + "deterministic_gate_id": "sha256:a45b0325986dc2880411c570254765360037856dce1df783740ac8ec2f1ff9d4", + "kind": "xklickd_supply_chain_promotion_gate_report", + "non_deterministic_zone": { + "evaluated_at": "2026-06-02", + "note": "evaluated_at is excluded from deterministic_gate_id." + }, + "non_normative": true, + "premium_pass_required": false, + "review_findings": [], + "schema_version": "xklickd.promotion_gate.v0.1", + "summary": { + "blocking": 0, + "checks_not_run": 1, + "checks_run": 5, + "review": 0 + } +} diff --git a/.internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.md b/.internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.md new file mode 100644 index 0000000..9c81315 --- /dev/null +++ b/.internal-skills/supply-chain/promotion-gate/xklickd-research-reader.gate.md @@ -0,0 +1,19 @@ +# Supply-chain promotion gate — ACCEPT + +- **Candidate:** `sha256:2dc00bf2ebb3251025b8de1b8bbfcd426f5edc5bb7f245d20d305857ab376bff` +- **Gate id:** `sha256:a45b0325986dc2880411c570254765360037856dce1df783740ac8ec2f1ff9d4` +- **Premium pass required:** False +- **Blocking:** 0 · **Review:** 0 · **Checks run:** 5 · **Not run:** 1 + +## Checks + +| Check | Verdict | Blocking | Review | +|---|---|---|---| +| candidate_shape | pass | 0 | 0 | +| boundary_tripwires | pass | 0 | 0 | +| threat_model | pass | 0 | 0 | +| source_license | pass | 0 | 0 | +| logical_diff | not_run | 0 | 0 | +| premium_pass_required | pass | 0 | 0 | + +> NON-NORMATIVE. No release, no compliance claim. The gate reports whether a human premium pass is required; it does not run one. diff --git a/scripts/generate_supply_chain_candidate.py b/scripts/generate_supply_chain_candidate.py new file mode 100644 index 0000000..2441599 --- /dev/null +++ b/scripts/generate_supply_chain_candidate.py @@ -0,0 +1,676 @@ +#!/usr/bin/env python3 +"""x.klickd supply-chain — internal candidate skill generator (runner v0.1). + +This is the candidate-generation stage of the documented supply-chain pipeline: +the build *runner* that turns a deterministic, config-only `build_request` into +a candidate skill in the INTERNAL v4.2 target shape described in +docs/internal/INTERNAL_SKILL_V4_2_MAPPING.md. + +NON-NORMATIVE. Internal only. This runner: + - produces NO public release, tag, DOI, package, or deploy; + - does NOT promote any public artefact to v4.2 (public stays v4.1 candidate); + - does NOT run the premium pass — it only marks where one is required; + - does NOT invent sources: every source comes from the build_request or a + referenced source_manifest. Missing domain information is surfaced as a + `requires_human_premium_pass` flag, never hallucinated. + +Anti-mirage contract: + - The runner emits the v4.2 *target shape* (metadata, competency_architecture, + memory_system, governance_system, memory_governance, runtime, context_graph, + interactions, evidence, security, audit, skill_lifecycle, output_contract). + Emitting the shape is NOT a claim that every lifecycle stage is implemented + or verified — that is the promotion gate's job, and it stays honest about + what it has and has not run. + - A candidate is only "complete enough to promote without human premium pass" + when no `requires_human_premium_pass` marker is set. The generator never + fabricates competencies, domain risk, or sources to clear that bar. + +Determinism: + - candidate_id / candidate_hash / run_id are derived ONLY from the canonical + build_request bytes (+ resolved source manifest bytes when referenced). + Identical inputs -> identical ids across runs / hosts / clocks. + - Any clock value (generated_at) is quarantined under non_deterministic_zone + and excluded from every hash. + +CLI: + python scripts/generate_supply_chain_candidate.py --build-request REQ.json + python scripts/generate_supply_chain_candidate.py --build-request REQ.json --out cand.json + +Exit codes: + 0 candidate generated (may carry requires_human_premium_pass markers) + 1 the build_request is structurally invalid (cannot generate honestly) + 2 usage / I-O error +""" +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[1] +DEFAULT_OUT_DIR = ( + REPO_ROOT / ".internal-skills" / "supply-chain" / "candidates" +) + +CANDIDATE_SCHEMA_VERSION = "xklickd.candidate.v0.1" +INTERNAL_TARGET_TRACK = "xklickd_internal_skill_v4_2" + +# The 7 foundation (transversal) competency anchors and 12 transversal-flow +# competencies. These are STRUCTURAL anchors (framework-referenced names), not +# fabricated domain knowledge — they are the shared "base transversal core" +# every candidate carries per docs/chimera/V4_1_COMPETENCY_IDENTIFICATION_PROTOCOL.md. +FOUNDATION_COMPETENCIES = ( + "ESCO:S1.transversal_thinking", + "ESCO:S2.transversal_collaboration", + "ESCO:S3.transversal_communication", + "LifeComp:Personal.self_regulation", + "LifeComp:Social.cooperation", + "LifeComp:Learning.learning_to_learn", + "DigComp:transversal.responsible_use", +) +TRANSVERSAL_COMPETENCIES = ( + "WEF:critical_thinking", + "WEF:problem_solving", + "WEF:creativity", + "WEF:adaptability", + "WEF:ethical_reasoning", + "ESCO:information_literacy", + "ESCO:digital_literacy", + "LifeComp:growth_mindset", + "LifeComp:empathy", + "DigComp:information_evaluation", + "DigComp:data_protection_awareness", + "DigComp:safety", +) + +# Harmonised v4.2 competency_architecture sub-layer names (mapping §3). +COMPETENCY_ARCH_LAYERS = ( + "competency_core", + "primary_domain_competencies", + "secondary_domain_competencies", + "domain_risk_profile", + "domain_output_requirements", +) + +# Canonical interactions flow (mapping §5.1). Stored verbatim so the candidate +# records the intended layer-communication contract, not an invented one. +CANONICAL_FLOW = ( + "user_task", + "intent_detection", + "competency_activation", + "memory_retrieval", + "context_graph_traversal", + "evidence_resolution", + "policy_evaluation", + "output_contract_check", + "human_veto_if_required", + "response_or_action", + "audit_event", + "memory_update_candidate", +) + +INTERACTION_FLOWS = ( + "task_to_competency_flow", + "competency_to_memory_flow", + "memory_to_context_graph_flow", + "context_graph_to_policy_flow", + "policy_to_output_contract_flow", + "output_to_audit_flow", + "lifecycle_to_runtime_flow", +) + +# skill_lifecycle stages (mapping §6). NOT named "supply_chain". +SKILL_LIFECYCLE_STAGES = ( + "build_request", + "source_manifest", + "generated_candidate", + "validation_pipeline", + "audit_trail_index", + "determinism_record", + "logical_diff_report", + "source_license_report", + "threat_model_report", + "benchmark_report", + "premium_pass_report", + "promotion_gate", + "rollback_protocol", + "deprecation_protocol", + "release_record", +) + +# Banned substrings on the generated output: internal-codename leakage and +# unbounded public claims. Mirrors the audit-stage tripwire. NOTE: the internal +# target-track name itself is allowed only under the explicit `internal_target` +# metadata key (it must not leak into any public-facing field), so we do not ban +# it globally here — the promotion gate's boundary tripwire enforces placement. +BANNED_SUBSTRINGS = ( + "chimera", + "universal standard", + "automatic gdpr", + "automatic eu ai act", + "benchmark superiority", + "proven benchmark", +) + + +class BuildRequestError(RuntimeError): + """Raised when the build_request cannot be honestly turned into a candidate.""" + + +# --- hashing / canonical helpers -------------------------------------------- +def _canonical_bytes(obj: Any) -> bytes: + return json.dumps( + obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") + + +def _sha256_obj(obj: Any) -> str: + return "sha256:" + hashlib.sha256(_canonical_bytes(obj)).hexdigest() + + +def _sha256_text(text: str) -> str: + return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def _rel(path: Path) -> str: + try: + return str(path.relative_to(REPO_ROOT)) + except ValueError: + return path.name + + +# --- input loading ----------------------------------------------------------- +def load_build_request(path: Path) -> tuple[dict[str, Any], str]: + if not path.exists(): + raise BuildRequestError(f"build_request not found: {path}") + text = path.read_text(encoding="utf-8") + try: + data = json.loads(text) + except json.JSONDecodeError as exc: + raise BuildRequestError(f"build_request is not valid JSON: {exc}") from exc + if not isinstance(data, dict): + raise BuildRequestError("build_request root must be a JSON object") + return data, text + + +def _resolve_sources( + request: dict[str, Any], request_path: Path +) -> tuple[list[dict[str, Any]], str | None, str | None]: + """Resolve declared sources from the request and/or a source_manifest. + + Sources come ONLY from the build_request (inline `sources`) or a referenced + `source_manifest` file. The runner never adds a source of its own. Returns + (sources, source_manifest_relpath, source_manifest_hash). + """ + sources: list[dict[str, Any]] = [] + inline = request.get("sources") + if inline is not None: + if not isinstance(inline, list): + raise BuildRequestError("build_request.sources must be a list") + for i, s in enumerate(inline): + if not isinstance(s, dict): + raise BuildRequestError(f"sources[{i}] must be an object") + sources.extend(inline) + + manifest_rel: str | None = None + manifest_hash: str | None = None + ref = request.get("source_manifest") + if ref: + manifest_path = (request_path.resolve().parent / str(ref)) + if not manifest_path.exists(): + raise BuildRequestError( + f"referenced source_manifest not found: {ref}" + ) + mtext = manifest_path.read_text(encoding="utf-8") + try: + mdata = json.loads(mtext) + except json.JSONDecodeError as exc: + raise BuildRequestError( + f"source_manifest is not valid JSON: {exc}" + ) from exc + msources = mdata.get("sources") + if not isinstance(msources, list): + raise BuildRequestError("source_manifest.sources must be a list") + for i, s in enumerate(msources): + if not isinstance(s, dict): + raise BuildRequestError( + f"source_manifest.sources[{i}] must be an object" + ) + sources.extend(msources) + manifest_rel = _rel(manifest_path) + manifest_hash = _sha256_text(mtext) + + return sources, manifest_rel, manifest_hash + + +# --- candidate assembly ------------------------------------------------------ +def _gap(reason: str) -> dict[str, str]: + return {"requires_human_premium_pass": True, "reason": reason} + + +def _competency_architecture( + request: dict[str, Any], gaps: list[str] +) -> dict[str, Any]: + domain = request.get("domain") + primary = request.get("primary_domain_competencies") + secondary = request.get("secondary_domain_competencies") or [] + + # Domain competencies MUST come from the request. We never invent them. + if not primary: + gaps.append("competency_architecture.primary_domain_competencies") + primary_block: Any = _gap( + "no primary domain competencies declared in build_request; " + "domain expertise must be supplied by a human premium pass, " + "not generated" + ) + else: + if not isinstance(primary, list): + raise BuildRequestError( + "primary_domain_competencies must be a list when provided" + ) + primary_block = list(primary) + + if not isinstance(secondary, list): + raise BuildRequestError( + "secondary_domain_competencies must be a list when provided" + ) + + domain_risk = request.get("domain_risk_profile") + if not domain_risk: + gaps.append("competency_architecture.domain_risk_profile") + risk_block: Any = _gap( + "no domain risk profile declared; domain risk must be assessed by " + "a human premium pass" + ) + else: + risk_block = domain_risk + + domain_out = request.get("domain_output_requirements") + if not domain_out: + gaps.append("competency_architecture.domain_output_requirements") + out_block: Any = _gap( + "no domain output requirements declared; must be specified by a " + "human premium pass" + ) + else: + out_block = domain_out + + return { + "competency_core": { + "foundation_competencies": list(FOUNDATION_COMPETENCIES), + "transversal_competencies": list(TRANSVERSAL_COMPETENCIES), + "base_transversal_core": { + "transversal_refs": list(FOUNDATION_COMPETENCIES), + }, + "note": ( + "Foundation/transversal anchors are framework-referenced " + "structural names, not fabricated domain knowledge." + ), + }, + "primary_domain_competencies": primary_block, + "secondary_domain_competencies": list(secondary), + "domain_risk_profile": risk_block, + "domain_output_requirements": out_block, + "harmonized_layers": list(COMPETENCY_ARCH_LAYERS), + "domain": domain, + } + + +def _governance_system(request: dict[str, Any]) -> dict[str, Any]: + """Governance defaults to the strictest safe posture. + + Where the request under-specifies, we DEFAULT TO SAFE (veto required, no + auto external action, human final owner) rather than guessing a permissive + setting. A request may tighten but the runner never loosens below the floor. + """ + g = request.get("governance") or {} + sensitive = list((request.get("risk_profile") or {}).get("sensitive_actions") or []) + return { + "authority_hierarchy": g.get("authority_hierarchy") + or ["human_operator", "human_reviewer", "agent"], + "human_veto": { + "required": True, + "lowerable": False, + "note": "raise-only; not lowerable by an agent", + }, + "consent_rules": g.get("consent_rules") or [], + "risk_levels": g.get("risk_levels") + or ["low", "medium", "high", "critical"], + "action_gates": g.get("action_gates") or [], + "non_lowerable_rules": sorted(set( + list(g.get("non_lowerable_rules") or []) + + ["no_unsourced_claim", "no_stub_as_loaded_skill"] + )), + "escalation_rules": g.get("escalation_rules") or [], + "approval_lifecycle": ["requested", "granted", "denied", "expired"], + "revocation_rules": g.get("revocation_rules") or [], + "policy_conflict_resolution": g.get("policy_conflict_resolution") + or "strictest_rule_wins", + "governance_audit": {"emits_to": "audit"}, + # Flat mirror consumed by the threat-model tool (classify()). + "human_veto_required": True, + "no_auto_external_action": True, + "final_decision_owner": "human_operator", + "_declared_sensitive_actions": sensitive, + } + + +def _memory_system(request: dict[str, Any]) -> dict[str, Any]: + m = request.get("memory") or {} + writes_long_term = bool(m.get("writes_long_term")) + return { + "retrieval": m.get("retrieval") or "scoped_by_active_competency", + "write_candidates": "subject_to_memory_governance", + "retention": m.get("retention") or "session_default", + # Flat mirror for the threat-model tool. + "writes_long_term": writes_long_term, + "reads_private_context": bool(m.get("reads_private_context")), + "promotion_rules": m.get("promotion_rules") + or (["human_review_required"] if writes_long_term else []), + } + + +def _memory_governance() -> dict[str, Any]: + return { + "role": "bridge_between_memory_and_governance", + "every_action_influencing_write_is_governed": True, + "every_governance_decision_consulting_memory_is_audited": True, + } + + +def _runtime(request: dict[str, Any]) -> dict[str, Any]: + return { + "loadable_only_if_promoted": True, + "lifecycle_gates_availability": True, + "tools": { + "allowed": list((request.get("tools") or {}).get("allowed") or []), + "forbidden": sorted(set( + list((request.get("tools") or {}).get("forbidden") or []) + + ["publish", "send_email"] + )), + }, + } + + +def _context_graph() -> dict[str, Any]: + return { + "node_types": [ + "competency", "memory", "evidence", "policy", "action", "audit", + ], + "edge_types": ["scopes", "requires", "creates", "vetoes", "audits"], + "traversed_by_runtime": True, + } + + +def _interactions() -> dict[str, Any]: + return { + "flows": list(INTERACTION_FLOWS), + "canonical_flow": list(CANONICAL_FLOW), + "human_veto_if_required_lowerable": False, + "memory_update_is_candidate_only": True, + } + + +def _evidence(sources: list[dict[str, Any]], gaps: list[str]) -> dict[str, Any]: + if not sources: + gaps.append("evidence.sources") + return { + "sources": [], + "requires_citations": True, + "status": _gap( + "no sources declared in build_request or source_manifest; " + "evidence must be supplied, not invented" + ), + } + return { + "sources": list(sources), + "requires_citations": True, + "source_count": len(sources), + } + + +def _security(request: dict[str, Any]) -> dict[str, Any]: + return { + "no_secrets_in_candidate": True, + "no_real_pii_in_candidate": True, + "private_public_boundary_guarded": True, + "declared_classification": request.get("classification") or "internal", + } + + +def _audit(build_request_hash: str) -> dict[str, Any]: + return { + "build_request_hash": build_request_hash, + "emits_audit_event_per_output": True, + "audit_trail_stage": "audit_trail_index", + } + + +def _skill_lifecycle() -> dict[str, Any]: + # Records the TARGET lifecycle layout. Stage presence here is structural; + # it is NOT an assertion that each stage is implemented/verified. + return { + "stages": list(SKILL_LIFECYCLE_STAGES), + "renamed_from": "supply_chain", + "completeness_claimed": False, + "note": ( + "Target lifecycle layout only; not an assertion that every stage " + "is implemented or verified. release_record is an INTERNAL record, " + "never a public tag/DOI/package/release." + ), + } + + +def _output_contract(request: dict[str, Any]) -> dict[str, Any]: + oc = request.get("output_contract") or {} + return { + "allowed_outputs": list(oc.get("allowed_outputs") or ["text_response"]), + "forbidden_outputs": sorted(set( + list(oc.get("forbidden_outputs") or []) + + ["unsourced_public_claim", "private_to_public"] + )), + "required_citations": True, + "required_uncertainty_markers": True, + "required_handoff_summary": True, + "required_audit_event": True, + "graph_bindings": { + "creates_action_node": bool(oc.get("creates_action_node")), + "requires_policy_node": True, + "requires_evidence_node": True, + "may_trigger_veto_edge": True, + "writes_audit_edge": True, + }, + # Flat mirror for the threat-model tool. + "requires_citations": True, + "emits_public_output": bool(oc.get("emits_public_output")), + } + + +def build_candidate( + request: dict[str, Any], + request_text: str, + request_path: Path, +) -> dict[str, Any]: + """Build the candidate skill in the v4.2 internal target shape. + + Deterministic given (request bytes + resolved source manifest bytes). + """ + skill_id = request.get("skill_id") + if not skill_id or not isinstance(skill_id, str): + raise BuildRequestError("build_request.skill_id (string) is required") + if not request.get("domain"): + raise BuildRequestError("build_request.domain is required") + + sources, manifest_rel, manifest_hash = _resolve_sources(request, request_path) + + build_request_hash = _sha256_text(request_text) + # Determinism anchor: id derived ONLY from canonical request + manifest hash. + id_material = { + "skill_id": skill_id, + "build_request_hash": build_request_hash, + "source_manifest_hash": manifest_hash, + } + candidate_id = _sha256_obj(id_material) + run_id = candidate_id # one run == one deterministic candidate id + + gaps: list[str] = [] + + candidate: dict[str, Any] = { + "schema_version": CANDIDATE_SCHEMA_VERSION, + "kind": "xklickd_internal_candidate_skill", + "non_normative": True, + "internal_target": { + "track": INTERNAL_TARGET_TRACK, + "public_version": "v4.1", + "note": ( + "Internal v4.2 target shape. NOT a public v4.2 release; public " + "x.klickd artefacts remain v4.1 candidates." + ), + }, + "skill_id": skill_id, + "domain": request.get("domain"), + "candidate_id": candidate_id, + "run_id": run_id, + "build_request_hash": build_request_hash, + "source_manifest": manifest_rel, + "source_manifest_hash": manifest_hash, + # --- v4.2 target top-level layers (mapping §1) --- + "metadata": { + "skill_id": skill_id, + "domain": request.get("domain"), + "title": request.get("title") or skill_id, + "size_tier": request.get("size_tier") or "lite", + "publisher": request.get("publisher") or "internal", + "status": "candidate", + }, + "competency_architecture": _competency_architecture(request, gaps), + "memory_system": _memory_system(request), + "governance_system": _governance_system(request), + "memory_governance": _memory_governance(), + "runtime": _runtime(request), + "context_graph": _context_graph(), + "interactions": _interactions(), + "evidence": _evidence(sources, gaps), + "security": _security(request), + "audit": _audit(build_request_hash), + "skill_lifecycle": _skill_lifecycle(), + "output_contract": _output_contract(request), + # --- threat-model-compatible flat mirrors (classify() reads these) --- + "risk_profile": request.get("risk_profile") + or {"default_risk": "low", "sensitive_actions": []}, + "governance": None, # filled below from governance_system mirror + "memory": None, # filled below from memory_system mirror + "tools": None, # filled below from runtime mirror + "sources": list(sources), + } + + # Wire the flat mirrors the threat-model tool consumes, derived (not + # duplicated by hand) from the structured layers above. + gov = candidate["governance_system"] + candidate["governance"] = { + "human_veto_required": gov["human_veto_required"], + "no_auto_external_action": gov["no_auto_external_action"], + "final_decision_owner": gov["final_decision_owner"], + "non_lowerable_rules": list(gov["non_lowerable_rules"]), + } + mem = candidate["memory_system"] + candidate["memory"] = { + "writes_long_term": mem["writes_long_term"], + "reads_private_context": mem["reads_private_context"], + "promotion_rules": list(mem["promotion_rules"]), + } + candidate["tools"] = { + "allowed": list(candidate["runtime"]["tools"]["allowed"]), + "forbidden": list(candidate["runtime"]["tools"]["forbidden"]), + } + + # Anti-mirage summary: surface (not hide) any premium-pass requirements. + candidate["premium_pass_status"] = { + "requires_human_premium_pass": bool(gaps), + "gaps": sorted(gaps), + "note": ( + "Missing domain information is surfaced as a gap, never " + "hallucinated. A clean candidate has an empty gaps list." + ), + } + + # candidate_hash over the deterministic core (no clock zone yet present). + candidate["candidate_hash"] = _sha256_obj(candidate) + return candidate + + +def render(candidate: dict[str, Any]) -> str: + return json.dumps(candidate, indent=2, sort_keys=True, ensure_ascii=False) + "\n" + + +def _scan_banned(text: str) -> list[str]: + low = text.lower() + return [s for s in BANNED_SUBSTRINGS if s in low] + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Internal x.klickd supply-chain candidate generator " + "(v4.2 target shape, non-normative).", + ) + parser.add_argument( + "--build-request", required=True, + help="path to a deterministic build_request JSON", + ) + parser.add_argument( + "--out", default=None, + help="output path for the candidate JSON (default: " + ".internal-skills/supply-chain/candidates/.json)", + ) + parser.add_argument( + "--quiet", action="store_true", + help="do not print the candidate to stdout", + ) + args = parser.parse_args(argv if argv is not None else sys.argv[1:]) + + req_path = Path(args.build_request) + try: + request, request_text = load_build_request(req_path) + candidate = build_candidate(request, request_text, req_path) + except BuildRequestError as exc: + print(f"FAIL (build_request): {exc}", file=sys.stderr) + return 1 + except OSError as exc: + print(f"FAIL (io): {exc}", file=sys.stderr) + return 2 + + serialized = render(candidate) + + banned = _scan_banned(serialized) + if banned: + print(f"FAIL: banned substring(s) in candidate: {banned}", file=sys.stderr) + return 1 + + if args.out: + out_path = Path(args.out) + else: + out_path = DEFAULT_OUT_DIR / f"{candidate['skill_id']}.json" + try: + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(serialized, encoding="utf-8") + except OSError as exc: + print(f"FAIL (io): {exc}", file=sys.stderr) + return 2 + + if not args.quiet: + sys.stdout.write(serialized) + print( + f"OK: candidate {candidate['skill_id']} (id {candidate['candidate_id']}, " + f"premium_pass_required=" + f"{candidate['premium_pass_status']['requires_human_premium_pass']}) " + f"-> {_rel(out_path)}", + file=sys.stderr, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/run_supply_chain_promotion_gate.py b/scripts/run_supply_chain_promotion_gate.py new file mode 100644 index 0000000..044488d --- /dev/null +++ b/scripts/run_supply_chain_promotion_gate.py @@ -0,0 +1,565 @@ +#!/usr/bin/env python3 +"""x.klickd supply-chain — combined promotion gate (v0.1). + +The promotion gate is the pass/fail orchestrator for the documented +supply-chain pipeline. It runs the existing tool-backed checks on a candidate +skill, applies candidate schema/shape checks and public/private boundary +tripwires, and classifies the candidate as: + + ACCEPT -- no blocking findings, no premium-pass requirement + ACCEPT_WITH_REVIEW -- no blocking findings, but human review/premium pass + is required (gaps, review-bucket sources, mediums) + BLOCK -- at least one blocking finding + +NON-NORMATIVE. Internal only. The gate: + - does NOT run the premium pass; it only REPORTS whether one is required; + - makes NO compliance, legal, security-certification, or benchmark claim; + - produces NO release, tag, DOI, package, or deploy; + - asserts a check result ONLY when it actually ran the check. A check that + could not run is recorded as "not_run" with a reason, never as "pass". + +Orchestrated checks (each is run only if its tool is importable on this branch): + - threat model (scripts/generate_supply_chain_threat_model.py) + - source/license (scripts/check_supply_chain_sources.py) when a source + manifest is provided + - logical diff (scripts/generate_supply_chain_diff.py) when --before is + provided + - candidate schema/shape checks (built in here) + - forbidden-claims / public-private boundary tripwires (built in here) + +Determinism: + - The gate report's deterministic_gate_id is derived only from the candidate + hash plus the sorted, normalized check verdicts. Clock values live under + non_deterministic_zone and are excluded from the id. + +CLI: + python scripts/run_supply_chain_promotion_gate.py --candidate CAND.json + python scripts/run_supply_chain_promotion_gate.py --candidate CAND.json \ + --source-manifest SRC.json --before PREV.json --out gate.json --md gate.md + +Exit codes: + 0 ACCEPT or ACCEPT_WITH_REVIEW (acceptable) + 1 BLOCK (a blocking finding was raised) + 2 usage / I-O error +""" +from __future__ import annotations + +import argparse +import datetime as _dt +import hashlib +import importlib.util +import json +import sys +from pathlib import Path +from types import ModuleType +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPTS_DIR = REPO_ROOT / "scripts" +DEFAULT_OUT_DIR = ( + REPO_ROOT / ".internal-skills" / "supply-chain" / "promotion-gate" +) + +GATE_SCHEMA_VERSION = "xklickd.promotion_gate.v0.1" + +# v4.2 internal target top-level layers a candidate MUST carry (mapping §1). +REQUIRED_CANDIDATE_LAYERS = ( + "metadata", + "competency_architecture", + "memory_system", + "governance_system", + "memory_governance", + "runtime", + "context_graph", + "interactions", + "evidence", + "security", + "audit", + "skill_lifecycle", + "output_contract", +) + +# Forbidden public claims (over-claim tripwire). Lower-cased substring match. +FORBIDDEN_CLAIM_SUBSTRINGS = ( + "universal standard", + "automatic gdpr", + "automatic eu ai act", + "automatically gdpr compliant", + "guaranteed compliance", + "benchmark superiority", + "proven benchmark", + "industry standard for all", +) + +# Internal codename that must never appear anywhere in a candidate. +FORBIDDEN_CODENAME_SUBSTRINGS = ( + "chimera", +) + +# The internal target-track name is allowed ONLY under the candidate's +# `internal_target` block; anywhere else is a leak of an internal name into a +# field that could surface publicly. +INTERNAL_TRACK_NAME = "xklickd_internal_skill_v4_2" + + +def _load_module(name: str, filename: str) -> ModuleType | None: + """Import a sibling script as a module, or return None if unavailable.""" + path = SCRIPTS_DIR / filename + if not path.exists(): + return None + spec = importlib.util.spec_from_file_location(name, path) + if not spec or not spec.loader: + return None + mod = importlib.util.module_from_spec(spec) + try: + spec.loader.exec_module(mod) + except Exception: # noqa: BLE001 - a broken tool must not crash the gate + return None + return mod + + +def _check( + name: str, + verdict: str, + blocking: list[str], + review: list[str], + detail: dict[str, Any] | None = None, +) -> dict[str, Any]: + """A single normalized check result. + + verdict in: pass | review | block | not_run + """ + return { + "check": name, + "verdict": verdict, + "blocking_findings": sorted(blocking), + "review_findings": sorted(review), + "detail": detail or {}, + } + + +def _not_run(name: str, reason: str) -> dict[str, Any]: + """A check that did not run. Its reason is a note, NOT a review finding. + + Recording the reason as a review finding would conflate "we skipped this" + with "a reviewer must look at this" and inflate the review count. A not_run + check contributes neither blocking nor review findings to the verdict. + """ + return { + "check": name, + "verdict": "not_run", + "blocking_findings": [], + "review_findings": [], + "detail": {"reason": reason}, + } + + +# --- individual checks ------------------------------------------------------- +def check_candidate_shape(candidate: dict[str, Any]) -> dict[str, Any]: + blocking: list[str] = [] + review: list[str] = [] + + if not isinstance(candidate, dict): + return _check("candidate_shape", "block", + ["candidate is not a JSON object"], []) + + for layer in REQUIRED_CANDIDATE_LAYERS: + if layer not in candidate: + blocking.append(f"missing required v4.2 layer: {layer}") + + if not candidate.get("skill_id"): + blocking.append("missing skill_id") + if not candidate.get("candidate_hash"): + review.append("missing candidate_hash (cannot anchor determinism)") + + # skill_lifecycle must not be literally named supply_chain and must not + # claim completeness. + sl = candidate.get("skill_lifecycle") or {} + if isinstance(sl, dict): + if sl.get("completeness_claimed") is True: + blocking.append( + "skill_lifecycle.completeness_claimed is true (supply-chain " + "completeness must not be claimed)" + ) + if str(sl.get("name", "")).lower() == "supply_chain": + blocking.append("skill_lifecycle must not be named 'supply_chain'") + + # output_contract.graph_bindings must be present (mapping §7). + oc = candidate.get("output_contract") or {} + if isinstance(oc, dict) and "graph_bindings" not in oc: + blocking.append("output_contract.graph_bindings missing") + + # Governance floor: human veto must be required and not lowerable. + gov = candidate.get("governance_system") or {} + hv = gov.get("human_veto") if isinstance(gov, dict) else None + if isinstance(hv, dict): + if not hv.get("required"): + blocking.append("governance_system.human_veto.required is not true") + if hv.get("lowerable") is True: + blocking.append("governance_system.human_veto is lowerable") + + verdict = "block" if blocking else ("review" if review else "pass") + return _check("candidate_shape", verdict, blocking, review, + {"required_layers": list(REQUIRED_CANDIDATE_LAYERS)}) + + +def check_boundary_tripwires(candidate_text: str, + candidate: dict[str, Any]) -> dict[str, Any]: + """Forbidden public claims + internal-name / private->public leakage.""" + blocking: list[str] = [] + review: list[str] = [] + low = candidate_text.lower() + + for sub in FORBIDDEN_CLAIM_SUBSTRINGS: + if sub in low: + blocking.append(f"forbidden public claim: {sub!r}") + + for sub in FORBIDDEN_CODENAME_SUBSTRINGS: + if sub in low: + blocking.append(f"internal codename leak: {sub!r}") + + # Internal track name is allowed only inside the internal_target block. + if INTERNAL_TRACK_NAME.lower() in low: + it = candidate.get("internal_target") + it_text = json.dumps(it, sort_keys=True).lower() if it else "" + # Count occurrences outside the internal_target block. + total = low.count(INTERNAL_TRACK_NAME.lower()) + inside = it_text.count(INTERNAL_TRACK_NAME.lower()) + if total > inside: + blocking.append( + "internal track name leaks outside internal_target block" + ) + + # Explicit private->public leak flags (mirrors threat-model contract). + if candidate.get("private_public_leak"): + blocking.append("candidate declares a private->public leak") + boundaries = candidate.get("boundaries") or {} + if isinstance(boundaries, dict) and boundaries.get("private_to_public_leak"): + blocking.append("boundaries.private_to_public_leak is set") + + # Public v4.2 over-claim: a candidate must not claim public v4.2. + it = candidate.get("internal_target") or {} + if isinstance(it, dict) and str(it.get("public_version", "")).startswith("v4.2"): + blocking.append("candidate claims public_version v4.2 (public stays v4.1)") + + verdict = "block" if blocking else ("review" if review else "pass") + return _check("boundary_tripwires", verdict, blocking, review) + + +def check_threat_model(candidate: dict[str, Any], candidate_path: str, + mod: ModuleType | None) -> dict[str, Any]: + if mod is None: + return _not_run("threat_model", + "threat-model tool not importable on this branch") + try: + report = mod.build_report(candidate, candidate_path) + except Exception as exc: # noqa: BLE001 + return _not_run("threat_model", f"threat-model tool errored: {exc}") + blocked = list(report.get("blocked_findings") or []) + mediums = report.get("summary", {}).get("by_severity", {}).get("medium", 0) + review = [f"{mediums} medium finding(s) to review"] if mediums else [] + verdict = "block" if blocked else ("review" if review else "pass") + return _check( + "threat_model", verdict, + [f"threat:{t}" for t in blocked], review, + {"deterministic_threat_model_id": + report.get("deterministic_threat_model_id"), + "summary": report.get("summary")}, + ) + + +def check_source_license(source_manifest: Path | None, + eval_date: _dt.date, + mod: ModuleType | None) -> dict[str, Any]: + if source_manifest is None: + return _not_run("source_license", "no --source-manifest provided") + if mod is None: + return _not_run("source_license", + "source-check tool not importable on this branch") + try: + manifest, text = mod.load_manifest(source_manifest) + report = mod.build_report(manifest, text, source_manifest, eval_date, 3) + except Exception as exc: # noqa: BLE001 + return _not_run("source_license", f"source-check could not run: {exc}") + summary = report.get("summary", {}) + blocked_n = summary.get("blocked", 0) + review_n = summary.get("review", 0) + blocking = [ + f"source:{f['id']}" for f in report.get("blocked_findings") or [] + ] + review = [f"{review_n} source(s) need review"] if review_n else [] + verdict = "block" if blocked_n else ("review" if review_n else "pass") + return _check("source_license", verdict, blocking, review, + {"deterministic_report_id": + report.get("deterministic_report_id"), + "summary": summary}) + + +def check_logical_diff(before: Path | None, candidate_path: Path, + mod: ModuleType | None) -> dict[str, Any]: + if before is None: + return _not_run("logical_diff", "no --before candidate provided") + if mod is None: + return _not_run("logical_diff", + "diff tool not importable on this branch") + try: + report = mod.build_report(before, candidate_path) + except Exception as exc: # noqa: BLE001 + return _not_run("logical_diff", f"diff tool could not run: {exc}") + blocked = report.get("blocked_findings") or [] + high = report.get("high_risk_findings") or [] + blocking = [f"diff:{f.get('path')}:{f.get('kind')}" for f in blocked] + review = [f"{len(high)} high-risk diff finding(s)"] if high else [] + verdict = "block" if blocked else ("review" if high else "pass") + return _check("logical_diff", verdict, blocking, review, + {"deterministic_diff_id": report.get("deterministic_diff_id"), + "summary": report.get("summary")}) + + +def check_premium_pass(candidate: dict[str, Any]) -> dict[str, Any]: + """Report (never run) whether a human premium pass is required.""" + status = candidate.get("premium_pass_status") or {} + gaps = list(status.get("gaps") or []) + requires = bool(status.get("requires_human_premium_pass")) or bool(gaps) + review = ( + [f"premium pass required for gap: {g}" for g in gaps] + if requires else [] + ) + # A premium-pass requirement is NOT blocking; it routes to ACCEPT_WITH_REVIEW. + verdict = "review" if requires else "pass" + return _check("premium_pass_required", verdict, [], review, + {"requires_human_premium_pass": requires, "gaps": sorted(gaps)}) + + +# --- orchestration ----------------------------------------------------------- +def _deterministic_gate_id(candidate_hash: str | None, + checks: list[dict[str, Any]]) -> str: + normalized = [ + { + "check": c["check"], + "verdict": c["verdict"], + "blocking_findings": c["blocking_findings"], + "review_findings": c["review_findings"], + } + for c in sorted(checks, key=lambda c: c["check"]) + ] + payload = json.dumps( + {"candidate_hash": candidate_hash, "checks": normalized}, + sort_keys=True, separators=(",", ":"), + ) + return "sha256:" + hashlib.sha256(payload.encode("utf-8")).hexdigest() + + +def run_gate( + candidate: dict[str, Any], + candidate_text: str, + candidate_path: Path, + source_manifest: Path | None, + before: Path | None, + eval_date: _dt.date, +) -> dict[str, Any]: + threat_mod = _load_module( + "sc_threat_model", "generate_supply_chain_threat_model.py" + ) + source_mod = _load_module( + "sc_source_check", "check_supply_chain_sources.py" + ) + diff_mod = _load_module("sc_diff", "generate_supply_chain_diff.py") + + checks: list[dict[str, Any]] = [ + check_candidate_shape(candidate), + check_boundary_tripwires(candidate_text, candidate), + check_threat_model(candidate, str(candidate_path), threat_mod), + check_source_license(source_manifest, eval_date, source_mod), + check_logical_diff(before, candidate_path, diff_mod), + check_premium_pass(candidate), + ] + + any_block = any(c["verdict"] == "block" for c in checks) + any_review = any(c["verdict"] == "review" for c in checks) + + if any_block: + classification = "BLOCK" + elif any_review: + classification = "ACCEPT_WITH_REVIEW" + else: + classification = "ACCEPT" + + all_blocking = sorted( + f for c in checks for f in c["blocking_findings"] + ) + all_review = sorted( + f for c in checks for f in c["review_findings"] + ) + + return { + "schema_version": GATE_SCHEMA_VERSION, + "kind": "xklickd_supply_chain_promotion_gate_report", + "non_normative": True, + "candidate_path": str(candidate_path), + "candidate_id": candidate.get("candidate_id"), + "candidate_hash": candidate.get("candidate_hash"), + "classification": classification, + "deterministic_gate_id": _deterministic_gate_id( + candidate.get("candidate_hash"), checks + ), + "summary": { + "checks_run": sum(1 for c in checks if c["verdict"] != "not_run"), + "checks_not_run": sum(1 for c in checks if c["verdict"] == "not_run"), + "blocking": len(all_blocking), + "review": len(all_review), + }, + "checks": checks, + "blocking_findings": all_blocking, + "review_findings": all_review, + "premium_pass_required": any( + c["check"] == "premium_pass_required" + and c["detail"].get("requires_human_premium_pass") + for c in checks + ), + "claim_boundaries": { + "is_security_certification": False, + "establishes_legal_compliance": False, + "is_full_automation": False, + "proves_loaded_executable_skill": False, + "runs_premium_pass": False, + "note": ("The gate orchestrates offline, stdlib-only checks and " + "reports whether a human premium pass is required; it " + "does not run that pass and makes no compliance claim."), + }, + "non_deterministic_zone": { + "evaluated_at": eval_date.isoformat(), + "note": ("evaluated_at is excluded from deterministic_gate_id."), + }, + } + + +def render_json(report: dict[str, Any]) -> str: + return json.dumps(report, indent=2, sort_keys=True, ensure_ascii=False) + "\n" + + +def render_md(report: dict[str, Any]) -> str: + lines = [ + f"# Supply-chain promotion gate — {report['classification']}", + "", + f"- **Candidate:** `{report.get('candidate_id')}`", + f"- **Gate id:** `{report['deterministic_gate_id']}`", + f"- **Premium pass required:** {report['premium_pass_required']}", + f"- **Blocking:** {report['summary']['blocking']} · " + f"**Review:** {report['summary']['review']} · " + f"**Checks run:** {report['summary']['checks_run']} · " + f"**Not run:** {report['summary']['checks_not_run']}", + "", + "## Checks", + "", + "| Check | Verdict | Blocking | Review |", + "|---|---|---|---|", + ] + for c in report["checks"]: + lines.append( + f"| {c['check']} | {c['verdict']} | " + f"{len(c['blocking_findings'])} | {len(c['review_findings'])} |" + ) + if report["blocking_findings"]: + lines += ["", "## Blocking findings", ""] + lines += [f"- {f}" for f in report["blocking_findings"]] + if report["review_findings"]: + lines += ["", "## Review findings", ""] + lines += [f"- {f}" for f in report["review_findings"]] + lines += [ + "", + "> NON-NORMATIVE. No release, no compliance claim. The gate reports " + "whether a human premium pass is required; it does not run one.", + "", + ] + return "\n".join(lines) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Combined x.klickd supply-chain promotion gate " + "(non-normative).", + ) + parser.add_argument("--candidate", required=True, + help="path to candidate skill JSON") + parser.add_argument("--source-manifest", default=None, + help="optional source manifest for the source/license check") + parser.add_argument("--before", default=None, + help="optional prior candidate JSON for a logical diff") + parser.add_argument("--out", default=None, + help="path to write the gate report JSON " + "(default: .internal-skills/supply-chain/" + "promotion-gate/.gate.json)") + parser.add_argument("--md", default=None, + help="optional path to write a Markdown summary") + parser.add_argument("--eval-date", default=None, + help="ISO date for source freshness math " + "(default: today UTC)") + parser.add_argument("--quiet", action="store_true", + help="do not print the report JSON to stdout") + args = parser.parse_args(argv if argv is not None else sys.argv[1:]) + + cand_path = Path(args.candidate) + if not cand_path.exists(): + print(f"error: candidate not found: {cand_path}", file=sys.stderr) + return 2 + candidate_text = cand_path.read_text(encoding="utf-8") + try: + candidate = json.loads(candidate_text) + except json.JSONDecodeError as exc: + print(f"error: candidate JSON parse failed: {exc}", file=sys.stderr) + return 2 + if not isinstance(candidate, dict): + print("error: candidate root must be a JSON object", file=sys.stderr) + return 2 + + source_manifest = Path(args.source_manifest) if args.source_manifest else None + if source_manifest is not None and not source_manifest.exists(): + print(f"error: source manifest not found: {source_manifest}", + file=sys.stderr) + return 2 + before = Path(args.before) if args.before else None + if before is not None and not before.exists(): + print(f"error: --before not found: {before}", file=sys.stderr) + return 2 + + if args.eval_date: + try: + eval_date = _dt.date.fromisoformat(args.eval_date[:10]) + except ValueError: + print(f"error: invalid --eval-date: {args.eval_date}", file=sys.stderr) + return 2 + else: + eval_date = _dt.datetime.now(_dt.timezone.utc).date() + + report = run_gate( + candidate, candidate_text, cand_path, source_manifest, before, eval_date + ) + + serialized = render_json(report) + if args.out: + out_path = Path(args.out) + else: + out_path = DEFAULT_OUT_DIR / f"{cand_path.stem}.gate.json" + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(serialized, encoding="utf-8") + if args.md: + md_path = Path(args.md) + md_path.parent.mkdir(parents=True, exist_ok=True) + md_path.write_text(render_md(report), encoding="utf-8") + if not args.quiet: + sys.stdout.write(serialized) + + print( + f"GATE: {report['classification']} " + f"(blocking={report['summary']['blocking']}, " + f"review={report['summary']['review']}, " + f"premium_pass_required={report['premium_pass_required']}) " + f"id={report['deterministic_gate_id']}", + file=sys.stderr, + ) + return 1 if report["classification"] == "BLOCK" else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/fixtures/supply_chain_candidate/build_request_clean.json b/tests/fixtures/supply_chain_candidate/build_request_clean.json new file mode 100644 index 0000000..5b8a497 --- /dev/null +++ b/tests/fixtures/supply_chain_candidate/build_request_clean.json @@ -0,0 +1,50 @@ +{ + "skill_id": "xklickd-research-reader", + "domain": "research", + "title": "Research Reader", + "size_tier": "lite", + "publisher": "internal", + "primary_domain_competencies": [ + "ESCO:research_methods", + "ESCO:academic_writing" + ], + "secondary_domain_competencies": [ + "ESCO:citation_management" + ], + "domain_risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + }, + "domain_output_requirements": { + "format": "cited_summary" + }, + "risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + }, + "tools": { + "allowed": ["read_file"], + "forbidden": [] + }, + "memory": { + "writes_long_term": false, + "reads_private_context": false + }, + "output_contract": { + "emits_public_output": true, + "allowed_outputs": ["text_response"] + }, + "sources": [ + { + "id": "skos", + "name": "skos-framework", + "title": "SKOS Reference", + "license": "CC-BY-4.0", + "freshness": "current", + "url": "https://example.org/skos", + "published_at": "2025-01-01", + "usage": "reference", + "category": "academic" + } + ] +} diff --git a/tests/fixtures/supply_chain_candidate/build_request_missing_domain.json b/tests/fixtures/supply_chain_candidate/build_request_missing_domain.json new file mode 100644 index 0000000..f424329 --- /dev/null +++ b/tests/fixtures/supply_chain_candidate/build_request_missing_domain.json @@ -0,0 +1,10 @@ +{ + "skill_id": "xklickd-medicine-advisor", + "domain": "medicine", + "title": "Medicine Advisor", + "size_tier": "lite", + "risk_profile": { + "default_risk": "low", + "sensitive_actions": [] + } +} diff --git a/tests/fixtures/supply_chain_candidate/source_manifest_ok.json b/tests/fixtures/supply_chain_candidate/source_manifest_ok.json new file mode 100644 index 0000000..667b9ed --- /dev/null +++ b/tests/fixtures/supply_chain_candidate/source_manifest_ok.json @@ -0,0 +1,14 @@ +{ + "schema_version": "xklickd.source_manifest.v0.1", + "sources": [ + { + "id": "skos", + "title": "SKOS Reference", + "license": "CC-BY-4.0", + "usage": "reference", + "url": "https://example.org/skos", + "published_at": "2025-01-01", + "category": "academic" + } + ] +} diff --git a/tests/test_supply_chain_candidate.py b/tests/test_supply_chain_candidate.py new file mode 100644 index 0000000..5825666 --- /dev/null +++ b/tests/test_supply_chain_candidate.py @@ -0,0 +1,229 @@ +"""Tests for scripts/generate_supply_chain_candidate.py. + +Internal candidate generator (v4.2 target shape). NON-NORMATIVE. These tests +assert the anti-mirage contract: deterministic ids, the full v4.2 layer set, +foundation/transversal competency anchors, and that missing domain information +is surfaced as `requires_human_premium_pass` rather than hallucinated. +""" +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPT = REPO_ROOT / "scripts" / "generate_supply_chain_candidate.py" +FIX = REPO_ROOT / "tests" / "fixtures" / "supply_chain_candidate" + +V4_2_LAYERS = ( + "metadata", + "competency_architecture", + "memory_system", + "governance_system", + "memory_governance", + "runtime", + "context_graph", + "interactions", + "evidence", + "security", + "audit", + "skill_lifecycle", + "output_contract", +) + + +def _load(): + spec = importlib.util.spec_from_file_location( + "generate_supply_chain_candidate", SCRIPT + ) + assert spec and spec.loader, f"could not load {SCRIPT}" + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def _candidate(mod, name: str): + path = FIX / name + request, text = mod.load_build_request(path) + return mod.build_candidate(request, text, path) + + +# --- structural -------------------------------------------------------------- +def test_script_exists(): + assert SCRIPT.exists() + + +def test_candidate_has_all_v4_2_layers(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + for layer in V4_2_LAYERS: + assert layer in cand, f"missing v4.2 layer: {layer}" + + +def test_foundation_and_transversal_counts(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + core = cand["competency_architecture"]["competency_core"] + assert len(core["foundation_competencies"]) == 7 + assert len(core["transversal_competencies"]) == 12 + + +def test_harmonized_domain_names_present(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + arch = cand["competency_architecture"] + for name in ( + "competency_core", + "primary_domain_competencies", + "secondary_domain_competencies", + "domain_risk_profile", + "domain_output_requirements", + ): + assert name in arch, f"missing harmonized arch layer: {name}" + + +def test_skill_lifecycle_not_named_supply_chain(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + sl = cand["skill_lifecycle"] + assert sl.get("renamed_from") == "supply_chain" + assert sl.get("completeness_claimed") is False + # build_request stage exists but the lifecycle is not literally "supply_chain". + assert "build_request" in sl["stages"] + assert "promotion_gate" in sl["stages"] + + +def test_output_contract_graph_bindings(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + gb = cand["output_contract"]["graph_bindings"] + for field in ( + "creates_action_node", + "requires_policy_node", + "requires_evidence_node", + "may_trigger_veto_edge", + "writes_audit_edge", + ): + assert field in gb, f"missing graph_binding: {field}" + + +def test_interactions_canonical_flow(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + flow = cand["interactions"]["canonical_flow"] + assert flow[0] == "user_task" + assert "human_veto_if_required" in flow + assert flow[-1] == "memory_update_candidate" + + +# --- anti-mirage: no hallucination ------------------------------------------ +def test_missing_domain_info_triggers_premium_pass(): + mod = _load() + cand = _candidate(mod, "build_request_missing_domain.json") + status = cand["premium_pass_status"] + assert status["requires_human_premium_pass"] is True + # The specific missing-domain gaps must be named, not silently filled. + assert "competency_architecture.primary_domain_competencies" in status["gaps"] + assert "evidence.sources" in status["gaps"] + + +def test_missing_domain_does_not_hallucinate_competencies(): + mod = _load() + cand = _candidate(mod, "build_request_missing_domain.json") + primary = cand["competency_architecture"]["primary_domain_competencies"] + # Must be the gap marker, not an invented competency list. + assert isinstance(primary, dict) + assert primary.get("requires_human_premium_pass") is True + # No sources were declared, so none may appear. + assert cand["sources"] == [] + assert cand["evidence"]["sources"] == [] + + +def test_clean_candidate_has_no_premium_pass_requirement(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + assert cand["premium_pass_status"]["requires_human_premium_pass"] is False + assert cand["premium_pass_status"]["gaps"] == [] + + +def test_sources_only_from_request(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + ids = {s.get("id") for s in cand["sources"]} + assert ids == {"skos"} # exactly what the request declared, nothing added + + +# --- governance floor -------------------------------------------------------- +def test_governance_floor_is_safe_by_default(): + mod = _load() + cand = _candidate(mod, "build_request_missing_domain.json") + gov = cand["governance_system"] + assert gov["human_veto"]["required"] is True + assert gov["human_veto"]["lowerable"] is False + assert gov["human_veto_required"] is True + assert gov["no_auto_external_action"] is True + assert gov["final_decision_owner"].startswith("human") + + +def test_threat_model_flat_mirrors_present(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + # The flat fields consumed by the threat-model tool must be present. + for key in ("governance", "memory", "tools", "risk_profile", + "output_contract", "sources", "skill_id"): + assert key in cand, f"missing threat-model flat field: {key}" + + +# --- determinism ------------------------------------------------------------- +def test_candidate_id_deterministic_across_runs(): + mod = _load() + a = _candidate(mod, "build_request_clean.json") + b = _candidate(mod, "build_request_clean.json") + assert a["candidate_id"] == b["candidate_id"] + assert a["candidate_hash"] == b["candidate_hash"] + assert a["run_id"] == b["run_id"] + + +def test_candidate_id_changes_with_input(): + mod = _load() + a = _candidate(mod, "build_request_clean.json") + b = _candidate(mod, "build_request_missing_domain.json") + assert a["candidate_id"] != b["candidate_id"] + + +def test_no_clock_field_in_deterministic_core(): + mod = _load() + cand = _candidate(mod, "build_request_clean.json") + # The candidate carries no top-level generated_at; any clock value would + # have to live in a quarantined zone, never in the hashed core. + assert "generated_at" not in cand + + +# --- CLI --------------------------------------------------------------------- +def test_cli_writes_candidate_and_exits_zero(tmp_path): + mod = _load() + out = tmp_path / "cand.json" + rc = mod.main([ + "--build-request", str(FIX / "build_request_clean.json"), + "--out", str(out), "--quiet", + ]) + assert rc == 0 + data = json.loads(out.read_text()) + assert data["skill_id"] == "xklickd-research-reader" + + +def test_cli_invalid_request_exits_one(tmp_path): + mod = _load() + bad = tmp_path / "bad.json" + bad.write_text(json.dumps({"domain": "x"})) # no skill_id + rc = mod.main(["--build-request", str(bad), "--quiet", + "--out", str(tmp_path / "o.json")]) + assert rc == 1 + + +def test_cli_missing_request_exits_two(tmp_path): + mod = _load() + rc = mod.main(["--build-request", str(tmp_path / "nope.json"), "--quiet", + "--out", str(tmp_path / "o.json")]) + # missing file is a build_request error -> exit 1 (cannot generate) + assert rc == 1 diff --git a/tests/test_supply_chain_promotion_gate.py b/tests/test_supply_chain_promotion_gate.py new file mode 100644 index 0000000..2122f14 --- /dev/null +++ b/tests/test_supply_chain_promotion_gate.py @@ -0,0 +1,267 @@ +"""Tests for scripts/run_supply_chain_promotion_gate.py. + +Combined promotion gate. NON-NORMATIVE. The gate orchestrates the existing +tool-backed checks, classifies ACCEPT / ACCEPT_WITH_REVIEW / BLOCK, and reports +(never runs) whether a human premium pass is required. A fixed --eval-date is +used so source freshness classification is reproducible. +""" +from __future__ import annotations + +import datetime as _dt +import importlib.util +import json +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +GATE_SCRIPT = REPO_ROOT / "scripts" / "run_supply_chain_promotion_gate.py" +GEN_SCRIPT = REPO_ROOT / "scripts" / "generate_supply_chain_candidate.py" +FIX = REPO_ROOT / "tests" / "fixtures" / "supply_chain_candidate" +EVAL_DATE = _dt.date(2026, 6, 2) + + +def _load(path: Path, name: str): + spec = importlib.util.spec_from_file_location(name, path) + assert spec and spec.loader, f"could not load {path}" + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def _gate_mod(): + return _load(GATE_SCRIPT, "run_supply_chain_promotion_gate") + + +def _gen_mod(): + return _load(GEN_SCRIPT, "generate_supply_chain_candidate") + + +def _make_candidate(tmp_path: Path, request_name: str) -> Path: + """Generate a real candidate from a fixture build_request.""" + gen = _gen_mod() + out = tmp_path / f"{request_name}.cand.json" + rc = gen.main([ + "--build-request", str(FIX / request_name), + "--out", str(out), "--quiet", + ]) + assert rc == 0 + return out + + +def _run(gate, candidate_path: Path, **kw): + candidate_text = candidate_path.read_text() + candidate = json.loads(candidate_text) + return gate.run_gate( + candidate, candidate_text, candidate_path, + kw.get("source_manifest"), kw.get("before"), EVAL_DATE, + ) + + +# --- structural -------------------------------------------------------------- +def test_script_exists(): + assert GATE_SCRIPT.exists() + + +def test_clean_candidate_accepts(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + rep = _run(gate, cand) + assert rep["classification"] == "ACCEPT" + assert rep["summary"]["blocking"] == 0 + assert rep["premium_pass_required"] is False + + +def test_report_has_required_fields(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + rep = _run(gate, cand) + for field in ( + "schema_version", "classification", "deterministic_gate_id", + "summary", "checks", "blocking_findings", "review_findings", + "premium_pass_required", "claim_boundaries", "non_deterministic_zone", + ): + assert field in rep, f"missing gate field: {field}" + + +# --- premium pass / accept-with-review -------------------------------------- +def test_missing_domain_candidate_accepts_with_review(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_missing_domain.json") + rep = _run(gate, cand) + assert rep["classification"] == "ACCEPT_WITH_REVIEW" + assert rep["premium_pass_required"] is True + assert rep["summary"]["blocking"] == 0 + + +def test_gate_does_not_run_premium_pass(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_missing_domain.json") + rep = _run(gate, cand) + assert rep["claim_boundaries"]["runs_premium_pass"] is False + + +# --- block paths ------------------------------------------------------------- +def test_forbidden_claim_blocks(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + data["metadata"]["title"] = "the universal standard for everything" + leak = tmp_path / "claim.json" + leak.write_text(json.dumps(data)) + rep = _run(gate, leak) + assert rep["classification"] == "BLOCK" + assert any("forbidden public claim" in f for f in rep["blocking_findings"]) + + +def test_internal_codename_leak_blocks(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + data["metadata"]["note"] = "internal chimera reference" + leak = tmp_path / "codename.json" + leak.write_text(json.dumps(data)) + rep = _run(gate, leak) + assert rep["classification"] == "BLOCK" + assert any("codename" in f for f in rep["blocking_findings"]) + + +def test_private_public_leak_blocks(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + data["private_public_leak"] = True + leak = tmp_path / "leak.json" + leak.write_text(json.dumps(data)) + rep = _run(gate, leak) + assert rep["classification"] == "BLOCK" + assert any("private->public" in f for f in rep["blocking_findings"]) + + +def test_public_v4_2_overclaim_blocks(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + data["internal_target"]["public_version"] = "v4.2" + leak = tmp_path / "v42.json" + leak.write_text(json.dumps(data)) + rep = _run(gate, leak) + assert rep["classification"] == "BLOCK" + assert any("public_version v4.2" in f for f in rep["blocking_findings"]) + + +def test_missing_v4_2_layer_blocks(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + del data["governance_system"] + bad = tmp_path / "nolayer.json" + bad.write_text(json.dumps(data)) + rep = _run(gate, bad) + assert rep["classification"] == "BLOCK" + assert any("governance_system" in f for f in rep["blocking_findings"]) + + +def test_completeness_claim_blocks(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + data["skill_lifecycle"]["completeness_claimed"] = True + bad = tmp_path / "complete.json" + bad.write_text(json.dumps(data)) + rep = _run(gate, bad) + assert rep["classification"] == "BLOCK" + assert any("completeness" in f for f in rep["blocking_findings"]) + + +# --- orchestrated checks run honestly --------------------------------------- +def test_not_run_checks_recorded_without_inflating_review(tmp_path): + """A skipped check is recorded as not_run with a reason, not as a review + finding — otherwise a clean candidate would never be a clean ACCEPT.""" + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + rep = _run(gate, cand) # no source manifest, no before -> 2 not_run checks + not_run = [c for c in rep["checks"] if c["verdict"] == "not_run"] + assert len(not_run) >= 2 + for c in not_run: + assert c["review_findings"] == [] + assert "reason" in c["detail"] + assert rep["classification"] == "ACCEPT" + + +def test_source_manifest_check_runs_when_provided(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + rep = _run(gate, cand, + source_manifest=FIX / "source_manifest_ok.json") + sc = next(c for c in rep["checks"] if c["check"] == "source_license") + assert sc["verdict"] in ("pass", "review") # it ran + assert rep["classification"] in ("ACCEPT", "ACCEPT_WITH_REVIEW") + + +def test_threat_model_check_runs(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + rep = _run(gate, cand) + tm = next(c for c in rep["checks"] if c["check"] == "threat_model") + assert tm["verdict"] != "not_run" + assert tm["detail"].get("deterministic_threat_model_id") + + +# --- determinism ------------------------------------------------------------- +def test_gate_id_stable_across_runs(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + a = _run(gate, cand) + b = _run(gate, cand) + assert a["deterministic_gate_id"] == b["deterministic_gate_id"] + + +def test_gate_id_excludes_eval_date(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + text = cand.read_text() + data = json.loads(text) + a = gate.run_gate(data, text, cand, None, None, _dt.date(2026, 6, 2)) + b = gate.run_gate(data, text, cand, None, None, _dt.date(2030, 1, 1)) + assert a["deterministic_gate_id"] == b["deterministic_gate_id"] + + +# --- CLI exit codes ---------------------------------------------------------- +def test_cli_accept_exits_zero(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + out = tmp_path / "gate.json" + rc = gate.main(["--candidate", str(cand), "--out", str(out), + "--quiet", "--eval-date", "2026-06-02"]) + assert rc == 0 + assert json.loads(out.read_text())["classification"] == "ACCEPT" + + +def test_cli_block_exits_one(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + data = json.loads(cand.read_text()) + data["private_public_leak"] = True + bad = tmp_path / "leak.json" + bad.write_text(json.dumps(data)) + out = tmp_path / "gate.json" + rc = gate.main(["--candidate", str(bad), "--out", str(out), + "--quiet", "--eval-date", "2026-06-02"]) + assert rc == 1 + + +def test_cli_missing_candidate_exits_two(tmp_path): + gate = _gate_mod() + rc = gate.main(["--candidate", str(tmp_path / "nope.json"), "--quiet"]) + assert rc == 2 + + +def test_cli_writes_md_summary(tmp_path): + gate = _gate_mod() + cand = _make_candidate(tmp_path, "build_request_clean.json") + out = tmp_path / "gate.json" + md = tmp_path / "gate.md" + rc = gate.main(["--candidate", str(cand), "--out", str(out), + "--md", str(md), "--quiet", "--eval-date", "2026-06-02"]) + assert rc == 0 + assert md.exists() + assert "promotion gate" in md.read_text().lower()