diff --git a/.gitattributes b/.gitattributes index fa0b9379..f916f24a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ tests/fixtures/fast_rerun_*/** -text experiments/080/cases/*/seed_archive/** -text +src/multi_agent_brief/evaluation_cases/fixtures/cases/*/workspace/** -text diff --git a/CHANGELOG.md b/CHANGELOG.md index 8648bac4..c0a7a32c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **Product OS reader-quality reference package**: the packaged + `same_evidence_reader_quality_regression` eval case now generates + Quality Panel JSON/summary/HTML plus clean delivery/audit bundle archives, + and the docs include a public-safe v0.11.3 reference note. This is an + inspectable reference-package regression signal only; it does not claim + output-quality improvement, semantic proof, delivery approval, or release + authorization. - **Same-evidence reader-quality regression pack**: packaged public-safe evaluation cases now include a synthetic `same_evidence_reader_quality_regression` workspace that holds evidence diff --git a/README.md b/README.md index 747f1a93..35b1be54 100644 --- a/README.md +++ b/README.md @@ -363,6 +363,7 @@ The goal is not to remove human judgment. The goal is to let humans spend more t - [Architecture Status](docs/architecture-status.md) - [Roadmap](docs/roadmap.md) - [Red lines and anti-patterns](docs/red-lines-and-anti-patterns.md) +- [Product OS reader-quality reference package](docs/reference-runs/v0.11.3-product-os-reader-quality-reference.md) - [Synthetic regression pack](docs/reference-runs/v0.11.1-synthetic-regression-pack.md) - [Public solar integration run](docs/reference-runs/v0.7.2-public-solar-integration.zh-CN.md) - [Organoid-industry failure study](docs/reference-runs/v0.7.4-organoid-failure-study.zh-CN.md) diff --git a/README.zh-CN.md b/README.zh-CN.md index f45258a0..b19f721b 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -349,6 +349,7 @@ BriefLoop 想把软件工程里的那套“可追踪、可回滚、可审计、 - [架构状态](docs/architecture-status.zh-CN.md) - [路线图](docs/roadmap.zh-CN.md) - [红线与反模式](docs/red-lines-and-anti-patterns.md) +- [Product OS 读者质量 reference package](docs/reference-runs/v0.11.3-product-os-reader-quality-reference.md) - [合成回归包](docs/reference-runs/v0.11.1-synthetic-regression-pack.md) - [公开运行摘要](docs/reference-runs/v0.7.2-public-solar-integration.zh-CN.md) - [失败研究](docs/reference-runs/v0.7.4-organoid-failure-study.zh-CN.md) diff --git a/docs/README.md b/docs/README.md index e930c6b4..083ad4fc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -46,6 +46,7 @@ tree is fully bilingual. - [Runtime recipes](runtime-recipes.md) - [Support matrix](support-matrix.md) - [Red lines and anti-patterns](red-lines-and-anti-patterns.md) +- [Pre-release v0.11.3 Product OS reader-quality reference package](reference-runs/v0.11.3-product-os-reader-quality-reference.md) - [Pre-release v0.11.1 synthetic regression pack](reference-runs/v0.11.1-synthetic-regression-pack.md) - [BriefLoop-090 A-controlled auditable-brief pilot](reference-runs/briefloop-090-a-controlled-pilot.md) - [Security](security.md) diff --git a/docs/reference-runs/v0.11.3-product-os-reader-quality-reference.md b/docs/reference-runs/v0.11.3-product-os-reader-quality-reference.md new file mode 100644 index 00000000..df9056d8 --- /dev/null +++ b/docs/reference-runs/v0.11.3-product-os-reader-quality-reference.md @@ -0,0 +1,101 @@ +# Pre-Release v0.11.3 Product OS Reader-Quality Reference Package + +This is a public-safe reference note for the pre-release v0.11.3 reference-run +refresh line. It documents one inspectable BriefLoop-era Product OS package +using the packaged synthetic `same_evidence_reader_quality_regression` case. + +It is not proof of output quality, semantic correctness, source-support +sufficiency, management readiness, delivery approval, legal approval, or public +release authorization. + +## Scope + +- Product entry shape: `industry-weekly` / `market_weekly` workspace semantics. +- Case type: packaged public-safe synthetic workspace. +- Reader artifact: `output/delivery/brief.md`. +- Audit/control artifacts: Claim Ledger, Atomic Claim Graph, Evidence Span + Registry, audit report, scoped gate reports, source appendix trace, Quality + Panel JSON, Quality Summary, Quality Panel HTML, and clean bundle manifest. +- Bundle surfaces: `output/delivery_bundle.zip` contains reader-facing delivery + artifacts only; `output/audit_bundle.zip` contains audit/control artifacts. +- Same-evidence boundary: the fixture holds source/evidence inputs fixed while + surfacing reader-quality diagnostics. + +The case does not call an LLM, fetch live sources, parse binary documents, +execute specialist agents, or approve delivery. + +## What The Package Shows + +- Reader-facing delivery can stay free of internal `[src:...]` claim markers. +- The audit bundle keeps the trace surfaces that the reader bundle omits. +- The Quality Panel can surface existing Product OS diagnostics together: + materiality/focus selection, reader template conformance, support-calibrated + wording, trajectory/guidance projections, and closeout guidance. +- Warning and incomplete surfaces remain visible instead of being narrated away. +- Clean delivery/audit archives can be generated from the deterministic bundle + manifest. + +## Reproduce + +From a source checkout or installed package: + +```bash +multi-agent-brief eval-cases validate +multi-agent-brief eval-cases run --case-id same_evidence_reader_quality_regression --json +``` + +The packaged case executes this deterministic sequence: + +```text +state.check +status.show +quality.summarize +packs.bundle +state.check +status.show +``` + +Expected generated artifacts inside the temporary workspace include: + +```text +output/intermediate/quality_panel.json +output/intermediate/quality_summary.md +output/intermediate/quality_panel.html +output/report_bundle_manifest.json +output/delivery_bundle.zip +output/audit_bundle.zip +``` + +The fixture itself is tracked under: + +```text +src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/ +``` + +## Known Warnings + +The reference case intentionally keeps several non-authoritative diagnostics +visible: + +- source evidence pack materialization is missing; +- materiality/focus exclusions require human review; +- reader template conformance has warning-only findings; +- support-calibrated wording has warning-only findings. + +These warnings are part of the reference package. They should not be hidden or +rephrased as a pass. + +## What This Does Not Show + +- It does not prove that BriefLoop writes a better brief. +- It does not prove that a source semantically supports a claim. +- It does not prove that reader-quality warnings correlate with human + preference. +- It does not prove that a report is management-ready, disclosure-ready, or + externally publishable. +- It does not use private customer, employer, investor-relations, or commercial + source material. + +Passing the case means the deterministic reference package and bundle surfaces +remained inspectable. It is a reference-package regression signal only. + diff --git a/src/multi_agent_brief/evaluation_cases/contract.py b/src/multi_agent_brief/evaluation_cases/contract.py index 3e7b0533..cdec36ee 100644 --- a/src/multi_agent_brief/evaluation_cases/contract.py +++ b/src/multi_agent_brief/evaluation_cases/contract.py @@ -30,6 +30,7 @@ "gates.show", "gates.validate", "guidance_manifestation.seed_report", + "packs.bundle", "provenance.build", "provenance.show", "provenance.validate", diff --git a/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/intermediate/finalize_report.json b/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/intermediate/finalize_report.json index ae1151ef..10156dd1 100644 --- a/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/intermediate/finalize_report.json +++ b/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/intermediate/finalize_report.json @@ -1,5 +1,8 @@ { "audit_binding": { + "audit_report_sha256": "aac0fcc4b91db3bc6d18970a06e5c9491c1d11d07fd6f76e80f7e1258c8bd701", + "audited_brief_sha256": "72826cb1e7ed20aa7b1b94af24af2d0d4cce2d3e57d22f801055d28ee9ef94ce", + "claim_ledger_sha256": "0f2407d58506b314d7c899fa77e692c6c3d01838a118b60ba6016dd917b7dc06", "findings": [], "status": "pass" }, @@ -16,6 +19,9 @@ "delivery_artifacts": [ "output/delivery/brief.md" ], + "delivery_artifact_sha256": { + "output/delivery/brief.md": "aec52d7b508b852dda9de6586974ef47ddb9d0162925e8c86f37f3bcd5286328" + }, "duplicate_citation_count": 0, "quality_panel_closeout": { "status": "recommended" diff --git a/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/source_appendix.md b/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/source_appendix.md new file mode 100644 index 00000000..635fc076 --- /dev/null +++ b/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/source_appendix.md @@ -0,0 +1,12 @@ +# Source Appendix + +## Sources + +### [S1] Synthetic market report excerpt + +- Type: local_file +- Category: market_report +- Retrieval source type: local_file +- Underlying evidence type: media_report +- Note: Public-safe synthetic source used only for deterministic reference-run fixture coverage. + diff --git a/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/source_appendix_trace.md b/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/source_appendix_trace.md new file mode 100644 index 00000000..b835d34f --- /dev/null +++ b/src/multi_agent_brief/evaluation_cases/fixtures/cases/same_evidence_reader_quality_regression/workspace/output/source_appendix_trace.md @@ -0,0 +1,10 @@ +# Source Appendix Trace + +This audit trace maps reader-facing source labels back to synthetic fixture +source records. It is public-safe and deterministic; it does not prove semantic +source support. + +| Reader label | Source ID | Claim ID | Source title | +|---|---|---|---| +| S1 | SYN_SRC_001 | SYN_CLAIM_001 | Synthetic market report excerpt | + diff --git a/src/multi_agent_brief/evaluation_cases/fixtures/manifest.yaml b/src/multi_agent_brief/evaluation_cases/fixtures/manifest.yaml index b138088c..ce53c33e 100644 --- a/src/multi_agent_brief/evaluation_cases/fixtures/manifest.yaml +++ b/src/multi_agent_brief/evaluation_cases/fixtures/manifest.yaml @@ -373,6 +373,8 @@ cases: args: {} - action: quality.summarize args: {} + - action: packs.bundle + args: {} - action: state.check args: {} - action: status.show @@ -390,6 +392,14 @@ cases: quality_panel: output/intermediate/quality_panel.json quality_summary: output/intermediate/quality_summary.md quality_panel_html: output/intermediate/quality_panel.html + - action: packs.bundle + exit_code: 0 + result_contains: + report_bundle_manifest: output/report_bundle_manifest.json + delivery_bundle_archive: output/delivery_bundle.zip + audit_bundle_archive: output/audit_bundle.zip + delivery_artifact_count: 1 + packaging_hygiene: clean - action: state.check exit_code: 0 - action: status.show @@ -398,6 +408,9 @@ cases: - output/intermediate/quality_panel.json - output/intermediate/quality_summary.md - output/intermediate/quality_panel.html + - output/report_bundle_manifest.json + - output/delivery_bundle.zip + - output/audit_bundle.zip artifact_statuses: - artifact_id: quality_panel status: valid @@ -433,6 +446,12 @@ cases: - scope: workspace file: output/intermediate/quality_panel.html text: "Quality Panel" + - scope: workspace + file: output/report_bundle_manifest.json + text: "reader_facing_artifacts_only" + - scope: workspace + file: output/report_bundle_manifest.json + text: "audit_control_artifacts_only_not_reader_delivery" absent_text: - scope: workspace file: output/delivery/brief.md diff --git a/src/multi_agent_brief/evaluation_cases/runner.py b/src/multi_agent_brief/evaluation_cases/runner.py index 1094a080..50b80889 100644 --- a/src/multi_agent_brief/evaluation_cases/runner.py +++ b/src/multi_agent_brief/evaluation_cases/runner.py @@ -41,6 +41,7 @@ GUIDANCE_MANIFESTATION_REQUIRED_NON_GOALS, GUIDANCE_MANIFESTATION_RUNTIME_EFFECT, ) +from multi_agent_brief.product.bundle_projection import write_report_bundle_manifest from multi_agent_brief.product.quality_panel import ( quality_panel_path, write_quality_panel, @@ -503,6 +504,18 @@ def _dispatch_action(command: dict[str, Any], context: dict[str, Any]) -> dict[s result["quality_summary"] = data["quality_summary"] if "quality_panel_html" in data: result["quality_panel_html"] = data["quality_panel_html"] + if "report_bundle_manifest" in data: + result["report_bundle_manifest"] = data["report_bundle_manifest"] + if "delivery_bundle_archive" in data: + result["delivery_bundle_archive"] = data["delivery_bundle_archive"] + if "audit_bundle_archive" in data: + result["audit_bundle_archive"] = data["audit_bundle_archive"] + if "delivery_artifact_count" in data: + result["delivery_artifact_count"] = data["delivery_artifact_count"] + if "audit_artifact_count" in data: + result["audit_artifact_count"] = data["audit_artifact_count"] + if "packaging_hygiene" in data: + result["packaging_hygiene"] = data["packaging_hygiene"] if "suggested_next_command" in data: result["suggested_next_command"] = data["suggested_next_command"] return result @@ -587,6 +600,8 @@ def _run_action(*, action: str, args: dict[str, Any], context: dict[str, Any]) - ) if action == "quality.summarize": return _write_quality_projection_artifacts(workspace=_require_workspace(workspace)) + if action == "packs.bundle": + return _write_bundle_projection_artifacts(workspace=_require_workspace(workspace)) if action == "state.decide": return record_decision( workspace=_require_workspace(workspace), @@ -724,6 +739,24 @@ def _write_quality_projection_artifacts(*, workspace: Path) -> dict[str, Any]: } +def _write_bundle_projection_artifacts(*, workspace: Path) -> dict[str, Any]: + ws = workspace.expanduser().resolve() + manifest = write_report_bundle_manifest(workspace=ws, write_archives=True) + archives = manifest.get("bundle_archives") if isinstance(manifest.get("bundle_archives"), dict) else {} + delivery = archives.get("delivery") if isinstance(archives.get("delivery"), dict) else {} + audit = archives.get("audit") if isinstance(archives.get("audit"), dict) else {} + return { + "ok": True, + "report_bundle_manifest": manifest.get("manifest_path"), + "delivery_bundle_archive": delivery.get("path"), + "audit_bundle_archive": audit.get("path"), + "delivery_artifact_count": (manifest.get("delivery_bundle") or {}).get("artifact_count"), + "audit_artifact_count": (manifest.get("audit_bundle") or {}).get("artifact_count"), + "packaging_hygiene": (manifest.get("packaging_hygiene") or {}).get("status"), + "boundary": "bundle_projection_only_not_delivery_or_release_authority", + } + + def _seed_guidance_manifestation_report(*, workspace: Path, args: dict[str, Any]) -> dict[str, Any]: intermediate = workspace / "output" / "intermediate" manifest_path = intermediate / "runtime_manifest.json" diff --git a/tests/test_evaluation_cases.py b/tests/test_evaluation_cases.py index 1d37550f..e4edf871 100644 --- a/tests/test_evaluation_cases.py +++ b/tests/test_evaluation_cases.py @@ -87,6 +87,7 @@ def test_eval_cases_same_evidence_reader_quality_regression(capsys): "state.check", "status.show", "quality.summarize", + "packs.bundle", "state.check", "status.show", ] @@ -94,6 +95,10 @@ def test_eval_cases_same_evidence_reader_quality_regression(capsys): assert quality_action["quality_panel"] == "output/intermediate/quality_panel.json" assert quality_action["quality_summary"] == "output/intermediate/quality_summary.md" assert quality_action["quality_panel_html"] == "output/intermediate/quality_panel.html" + bundle_action = case["actions"][3] + assert bundle_action["report_bundle_manifest"] == "output/report_bundle_manifest.json" + assert bundle_action["delivery_bundle_archive"] == "output/delivery_bundle.zip" + assert bundle_action["audit_bundle_archive"] == "output/audit_bundle.zip" def test_eval_cases_improvement_approved_case_materializes_snapshot(capsys):