diff --git a/organism-strain-boundary-guard/README.md b/organism-strain-boundary-guard/README.md new file mode 100644 index 00000000..db40d3a0 --- /dev/null +++ b/organism-strain-boundary-guard/README.md @@ -0,0 +1,57 @@ +# Organism Strain Boundary Guard + +This module adds a focused organism, strain, cell-line, and host-species +boundary guard for Scientific Knowledge Graph Integration. It protects entity +pages, graph navigation, and AI recommendation paths from unsafe biological +node relationships before they are published. + +## Scope + +The guard validates synthetic knowledge graph packets for: + +- sample-to-organism taxon consistency +- strain-to-parent-species boundaries +- pathogen-to-host role direction and supported host taxon evidence +- cell-line species provenance and culture collection evidence +- mixed-organism sample edges that need contamination or co-culture review +- recommendation paths that depend on suppressed or review-only biological + graph edges + +It emits deterministic edge decisions, recommendation decisions, curator +actions, JSON-LD review packets, Markdown reports, SVG summaries, and a short +H.264 demo video. No credentials, live APIs, private data, or external services +are used. + +## Issue #17 Requirement Mapping + +- Entity extraction and linked data: consumes typed organism, strain, + cell-line, pathogen, sample, and dataset graph nodes and exports a JSON-LD + review packet. +- Knowledge navigation: prevents graph search paths from crossing unsupported + organism or strain boundaries. +- AI research recommendations: suppresses recommendations that rely on + taxon-mismatched, role-inverted, or unresolved mixed-organism graph paths. +- Entity pages and curator workflows: emits per-edge evidence reasons and + remediation actions for graph stewards. + +## Non-Overlap + +This is not a biological accession crosswalk, geospatial sample provenance +guard, measurement harmonization module, temporal-validity guard, evidence +freshness guard, multilingual alias guard, ontology drift migration, or broad +graph ingestion module. It focuses specifically on biological boundary safety +for organism, strain, host, cell-line, and sample graph edges. + +## Validation + +```bash +node organism-strain-boundary-guard/test.js +node organism-strain-boundary-guard/demo.js +node organism-strain-boundary-guard/render-video.js +node --check organism-strain-boundary-guard/index.js +node --check organism-strain-boundary-guard/sample-data.js +node --check organism-strain-boundary-guard/test.js +node --check organism-strain-boundary-guard/demo.js +node --check organism-strain-boundary-guard/render-video.js +git diff --check +``` diff --git a/organism-strain-boundary-guard/demo.js b/organism-strain-boundary-guard/demo.js new file mode 100644 index 00000000..94c79bd8 --- /dev/null +++ b/organism-strain-boundary-guard/demo.js @@ -0,0 +1,27 @@ +const fs = require("fs"); +const path = require("path"); +const graph = require("./sample-data"); +const { + inspectOrganismBoundaryGraph, + renderMarkdownReport, + renderSvgReport, +} = require("./index"); + +const reportDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportDir, { recursive: true }); + +const report = inspectOrganismBoundaryGraph(graph); +const jsonPath = path.join(reportDir, "organism-boundary-review.json"); +const markdownPath = path.join(reportDir, "organism-boundary-review.md"); +const svgPath = path.join(reportDir, "organism-boundary-review.svg"); + +fs.writeFileSync(jsonPath, `${JSON.stringify(report, null, 2)}\n`); +fs.writeFileSync(markdownPath, renderMarkdownReport(report)); +fs.writeFileSync(svgPath, renderSvgReport(report)); + +console.log(`wrote ${path.relative(process.cwd(), jsonPath)}`); +console.log(`wrote ${path.relative(process.cwd(), markdownPath)}`); +console.log(`wrote ${path.relative(process.cwd(), svgPath)}`); +console.log( + `summary: ${report.summary.edgeSuppress} suppressed edges, ${report.summary.edgeReview} review edge` +); diff --git a/organism-strain-boundary-guard/index.js b/organism-strain-boundary-guard/index.js new file mode 100644 index 00000000..45de689a --- /dev/null +++ b/organism-strain-boundary-guard/index.js @@ -0,0 +1,556 @@ +const crypto = require("crypto"); + +function buildLookup(items) { + return new Map((items || []).map((item) => [item.id, item])); +} + +function reason(code, message, evidence) { + return { code, message, evidence }; +} + +function normalizeList(values) { + return Array.isArray(values) ? values.filter(Boolean) : []; +} + +function evidenceItems(edge, evidenceLookup) { + return normalizeList(edge.evidenceIds).map((id) => evidenceLookup.get(id)); +} + +function evidenceHasValue(items, key, value) { + return items.some((item) => item && item[key] === value); +} + +function evidenceListContains(items, key, value) { + return items.some((item) => item && normalizeList(item[key]).includes(value)); +} + +function evaluateSampleOrganism(edge, source, target, evidence) { + const blockers = []; + const warnings = []; + + if (source.type !== "sample" || target.type !== "organism") { + blockers.push( + reason("invalid_sample_organism_roles", "sample_of_organism requires sample to organism", { + sourceType: source.type, + targetType: target.type, + }) + ); + return { blockers, warnings }; + } + + if (source.taxonId !== target.taxonId) { + blockers.push( + reason("sample_taxon_mismatch", "sample taxon does not match organism node", { + sampleTaxonId: source.taxonId, + organismTaxonId: target.taxonId, + }) + ); + } + + if (!source.voucherId || !evidenceHasValue(evidence, "taxonId", target.taxonId)) { + warnings.push( + reason("voucher_evidence_incomplete", "sample organism edge needs voucher evidence", { + voucherId: source.voucherId || "missing", + targetTaxonId: target.taxonId, + }) + ); + } + + return { blockers, warnings }; +} + +function evaluateStrainSpecies(edge, source, target, evidence) { + const blockers = []; + const warnings = []; + + if (source.type !== "strain" || target.type !== "organism") { + blockers.push( + reason("invalid_strain_species_roles", "strain_of_species requires strain to organism", { + sourceType: source.type, + targetType: target.type, + }) + ); + return { blockers, warnings }; + } + + if (source.parentTaxonId !== target.taxonId) { + blockers.push( + reason("strain_parent_taxon_mismatch", "strain parent taxon does not match organism node", { + strainParentTaxonId: source.parentTaxonId, + organismTaxonId: target.taxonId, + }) + ); + } + + if (!evidenceHasValue(evidence, "parentTaxonId", source.parentTaxonId)) { + warnings.push( + reason("strain_registry_evidence_missing", "strain parent evidence is missing", { + strainId: source.id, + }) + ); + } + + return { blockers, warnings }; +} + +function evaluatePathogenHost(edge, source, target, evidence) { + const blockers = []; + const warnings = []; + + if (source.type !== "pathogen" || target.type !== "organism") { + blockers.push( + reason("invalid_pathogen_host_roles", "pathogen_in_host requires pathogen to host organism", { + sourceType: source.type, + targetType: target.type, + }) + ); + return { blockers, warnings }; + } + + const supportedHosts = normalizeList(source.supportedHostTaxonIds); + if (!supportedHosts.includes(target.taxonId)) { + blockers.push( + reason("host_taxon_not_supported", "host organism is not supported by pathogen host evidence", { + pathogenId: source.id, + targetTaxonId: target.taxonId, + supportedHostTaxonIds: supportedHosts, + }) + ); + } + + if (!evidenceListContains(evidence, "supportedHostTaxonIds", target.taxonId)) { + warnings.push( + reason("host_range_evidence_missing", "host edge lacks matching host-range evidence", { + targetTaxonId: target.taxonId, + }) + ); + } + + return { blockers, warnings }; +} + +function evaluateCellLineSpecies(edge, source, target, evidence) { + const blockers = []; + const warnings = []; + + if (source.type !== "cell_line" || target.type !== "organism") { + blockers.push( + reason("invalid_cell_line_species_roles", "cell_line_from_species requires cell line to organism", { + sourceType: source.type, + targetType: target.type, + }) + ); + return { blockers, warnings }; + } + + if (source.speciesTaxonId !== target.taxonId) { + blockers.push( + reason("cell_line_species_mismatch", "cell-line species does not match organism node", { + speciesTaxonId: source.speciesTaxonId, + organismTaxonId: target.taxonId, + }) + ); + } + + if (!source.cultureCollectionId || !evidenceHasValue(evidence, "cultureCollectionId", source.cultureCollectionId)) { + warnings.push( + reason("cell_line_registry_missing", "cell-line edge needs culture collection evidence", { + cultureCollectionId: source.cultureCollectionId || "missing", + }) + ); + } + + return { blockers, warnings }; +} + +function evaluateDatasetSample(edge, source, target, evidence) { + const blockers = []; + const warnings = []; + + if (source.type !== "dataset" || target.type !== "sample") { + blockers.push( + reason("invalid_dataset_sample_roles", "dataset_contains_sample requires dataset to sample", { + sourceType: source.type, + targetType: target.type, + }) + ); + return { blockers, warnings }; + } + + if (!source.doi || !evidenceHasValue(evidence, "doi", source.doi)) { + warnings.push( + reason("dataset_doi_evidence_missing", "dataset sample edge needs DOI evidence", { + doi: source.doi || "missing", + }) + ); + } + + const observedTaxa = normalizeList(target.observedTaxonIds); + if (observedTaxa.length > 1 && !target.contaminationReview) { + warnings.push( + reason("mixed_organism_review_missing", "mixed-organism sample needs contamination or co-culture review", { + sampleId: target.id, + observedTaxonIds: observedTaxa, + }) + ); + } + + return { blockers, warnings }; +} + +function evaluateTypedEdge(edge, source, target, evidence) { + if (edge.type === "sample_of_organism") { + return evaluateSampleOrganism(edge, source, target, evidence); + } + if (edge.type === "strain_of_species") { + return evaluateStrainSpecies(edge, source, target, evidence); + } + if (edge.type === "pathogen_in_host") { + return evaluatePathogenHost(edge, source, target, evidence); + } + if (edge.type === "cell_line_from_species") { + return evaluateCellLineSpecies(edge, source, target, evidence); + } + if (edge.type === "dataset_contains_sample") { + return evaluateDatasetSample(edge, source, target, evidence); + } + return { + blockers: [], + warnings: [ + reason("unhandled_edge_type", "edge type is not handled by this biological boundary guard", { + edgeType: edge.type, + }), + ], + }; +} + +function actionsFor(decision, blockers, warnings) { + if (decision === "allow") { + return ["publish biological graph edge", "allow recommendation paths to use this edge"]; + } + + const codes = new Set([...blockers, ...warnings].map((item) => item.code)); + const actions = []; + + if (codes.has("strain_parent_taxon_mismatch")) { + actions.push("suppress strain edge until parent organism is corrected"); + actions.push("request strain registry evidence from curator"); + } + if (codes.has("host_taxon_not_supported")) { + actions.push("suppress host recommendation path"); + actions.push("request host range evidence or mark as experimental model"); + } + if (codes.has("mixed_organism_review_missing")) { + actions.push("show dataset path only with contamination review notice"); + actions.push("request co-culture or contamination review evidence"); + } + if (codes.has("sample_taxon_mismatch") || codes.has("cell_line_species_mismatch")) { + actions.push("quarantine biological entity page edge"); + } + if (actions.length === 0) { + actions.push("route edge to graph curator review"); + } + + return [...new Set(actions)]; +} + +function evaluateEdge(edge, context) { + const blockers = []; + const warnings = []; + const source = context.entities.get(edge.source); + const target = context.entities.get(edge.target); + const evidence = evidenceItems(edge, context.evidence); + + if (!source) { + blockers.push(reason("missing_source", "edge source node is missing", { source: edge.source })); + } + if (!target) { + blockers.push(reason("missing_target", "edge target node is missing", { target: edge.target })); + } + if (normalizeList(edge.evidenceIds).length === 0) { + warnings.push(reason("missing_evidence_ids", "edge has no evidence identifiers", { edgeId: edge.id })); + } + + for (const id of normalizeList(edge.evidenceIds)) { + if (!context.evidence.has(id)) { + blockers.push(reason("unknown_evidence", "edge references unknown evidence", { evidenceId: id })); + } + } + + if (source && target) { + const typed = evaluateTypedEdge(edge, source, target, evidence); + blockers.push(...typed.blockers); + warnings.push(...typed.warnings); + } + + const decision = + blockers.length > 0 ? "suppress" : warnings.length > 0 ? "curator_review" : "allow"; + + const result = { + edgeId: edge.id, + type: edge.type, + source: edge.source, + target: edge.target, + decision, + blockers, + warnings, + actions: actionsFor(decision, blockers, warnings), + }; + + return { + ...result, + auditDigest: digest(result), + }; +} + +function evaluateRecommendation(recommendation, edgeLookup) { + const blockers = []; + const warnings = []; + const path = []; + + for (const edgeId of normalizeList(recommendation.pathEdgeIds)) { + const edgeDecision = edgeLookup.get(edgeId); + if (!edgeDecision) { + blockers.push(reason("missing_path_edge", "recommendation references missing edge", { edgeId })); + continue; + } + path.push({ + edgeId, + decision: edgeDecision.decision, + auditDigest: edgeDecision.auditDigest, + }); + if (edgeDecision.decision === "suppress") { + blockers.push( + reason("path_contains_suppressed_edge", "recommendation uses a suppressed biological edge", { + edgeId, + }) + ); + } + if (edgeDecision.decision === "curator_review") { + warnings.push( + reason("path_contains_review_edge", "recommendation uses an edge requiring curator review", { + edgeId, + }) + ); + } + } + + const decision = + blockers.length > 0 ? "suppress" : warnings.length > 0 ? "show_with_notice" : "show"; + + const result = { + recommendationId: recommendation.id, + label: recommendation.label, + decision, + path, + blockers, + warnings, + actions: + decision === "show" + ? ["show biological graph recommendation"] + : ["suppress or annotate recommendation until biological boundary review completes"], + }; + + return { + ...result, + auditDigest: digest(result), + }; +} + +function inspectOrganismBoundaryGraph(graph) { + const context = { + entities: buildLookup(graph.entities), + evidence: buildLookup(graph.evidence), + }; + const edgeDecisions = normalizeList(graph.edges).map((edge) => evaluateEdge(edge, context)); + const edgeLookup = new Map(edgeDecisions.map((item) => [item.edgeId, item])); + const recommendationDecisions = normalizeList(graph.recommendations).map((recommendation) => + evaluateRecommendation(recommendation, edgeLookup) + ); + + const summary = { + graphId: graph.graphId, + generatedAt: graph.generatedAt, + entityCount: normalizeList(graph.entities).length, + edgeCount: edgeDecisions.length, + edgeAllow: edgeDecisions.filter((item) => item.decision === "allow").length, + edgeReview: edgeDecisions.filter((item) => item.decision === "curator_review").length, + edgeSuppress: edgeDecisions.filter((item) => item.decision === "suppress").length, + recommendationCount: recommendationDecisions.length, + recommendationShow: recommendationDecisions.filter((item) => item.decision === "show").length, + recommendationNotice: recommendationDecisions.filter((item) => item.decision === "show_with_notice").length, + recommendationSuppress: recommendationDecisions.filter((item) => item.decision === "suppress").length, + }; + + const packet = { + generatedAt: graph.generatedAt, + graphId: graph.graphId, + summary, + edgeDecisions, + recommendationDecisions, + }; + + return { + ...packet, + jsonLd: renderJsonLd(packet), + auditDigest: digest(packet), + }; +} + +function renderJsonLd(report) { + return { + "@context": { + scibase: "https://scibase.ai/schema#", + taxon: "https://www.ncbi.nlm.nih.gov/taxonomy/", + decision: "scibase:decision", + blocker: "scibase:blocker", + warning: "scibase:warning", + }, + "@type": "scibase:OrganismBoundaryReviewPacket", + "@id": `scibase:organism-boundary:${report.graphId}`, + graphId: report.graphId, + generatedAt: report.generatedAt, + summary: report.summary, + edgeDecisions: report.edgeDecisions.map((item) => ({ + "@type": "scibase:OrganismBoundaryEdgeDecision", + edgeId: item.edgeId, + decision: item.decision, + blockers: item.blockers.map((entry) => entry.code), + warnings: item.warnings.map((entry) => entry.code), + auditDigest: item.auditDigest, + })), + recommendationDecisions: report.recommendationDecisions.map((item) => ({ + "@type": "scibase:OrganismBoundaryRecommendationDecision", + recommendationId: item.recommendationId, + decision: item.decision, + blockers: item.blockers.map((entry) => entry.code), + warnings: item.warnings.map((entry) => entry.code), + auditDigest: item.auditDigest, + })), + }; +} + +function renderMarkdownReport(report) { + const lines = [ + "# Organism Strain Boundary Report", + "", + `Generated: ${report.generatedAt}`, + `Graph: ${report.graphId}`, + `Audit digest: ${report.auditDigest}`, + "", + "## Summary", + "", + `- Entities: ${report.summary.entityCount}`, + `- Edges: ${report.summary.edgeCount}`, + `- Edge allow: ${report.summary.edgeAllow}`, + `- Edge curator review: ${report.summary.edgeReview}`, + `- Edge suppress: ${report.summary.edgeSuppress}`, + `- Recommendations: ${report.summary.recommendationCount}`, + `- Recommendation show: ${report.summary.recommendationShow}`, + `- Recommendation show with notice: ${report.summary.recommendationNotice}`, + `- Recommendation suppress: ${report.summary.recommendationSuppress}`, + "", + "## Edge Decisions", + "", + ]; + + for (const item of report.edgeDecisions) { + lines.push(`### ${item.edgeId}`); + lines.push(""); + lines.push(`- Type: ${item.type}`); + lines.push(`- Decision: ${item.decision}`); + lines.push(`- Blockers: ${item.blockers.map((entry) => entry.code).join(", ") || "none"}`); + lines.push(`- Warnings: ${item.warnings.map((entry) => entry.code).join(", ") || "none"}`); + lines.push(`- Actions: ${item.actions.join("; ")}`); + lines.push(`- Digest: ${item.auditDigest}`); + lines.push(""); + } + + lines.push("## Recommendation Decisions"); + lines.push(""); + + for (const item of report.recommendationDecisions) { + lines.push(`### ${item.recommendationId}`); + lines.push(""); + lines.push(`- Label: ${item.label}`); + lines.push(`- Decision: ${item.decision}`); + lines.push(`- Path: ${item.path.map((entry) => `${entry.edgeId}:${entry.decision}`).join(", ")}`); + lines.push(`- Blockers: ${item.blockers.map((entry) => entry.code).join(", ") || "none"}`); + lines.push(`- Warnings: ${item.warnings.map((entry) => entry.code).join(", ") || "none"}`); + lines.push(`- Digest: ${item.auditDigest}`); + lines.push(""); + } + + while (lines[lines.length - 1] === "") { + lines.pop(); + } + + return `${lines.join("\n")}\n`; +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +function renderSvgReport(report) { + const cards = [ + ["Entities", report.summary.entityCount, "#355c7d"], + ["Edges allowed", report.summary.edgeAllow, "#2a9d8f"], + ["Edges reviewed", report.summary.edgeReview, "#e9c46a"], + ["Edges suppressed", report.summary.edgeSuppress, "#e76f51"], + ["Recommendations shown", report.summary.recommendationShow, "#457b9d"], + ["Recommendations blocked", report.summary.recommendationSuppress, "#b56576"], + ]; + + const cardSvg = cards + .map(([label, value, color], index) => { + const x = 72 + (index % 3) * 380; + const y = 188 + Math.floor(index / 3) * 150; + return [ + ``, + ``, + `${escapeXml(label)}`, + `${value}`, + ].join("\n"); + }) + .join("\n"); + + const curatorRows = report.edgeDecisions + .filter((item) => item.decision !== "allow") + .slice(0, 3) + .map((item, index) => { + const y = 545 + index * 32; + const codes = [...item.blockers, ...item.warnings].map((entry) => entry.code).join(", "); + return `${escapeXml(item.edgeId)} - ${escapeXml(item.decision)} - ${escapeXml(codes)}`; + }) + .join("\n"); + + return ` + + + Organism Strain Boundary Guard + Graph ${escapeXml(report.graphId)} reviewed at ${escapeXml(report.generatedAt)} + ${cardSvg} + Curator queue + ${curatorRows} + Audit digest ${escapeXml(report.auditDigest.slice(0, 32))} +`; +} + +function digest(value) { + return crypto.createHash("sha256").update(JSON.stringify(value)).digest("hex"); +} + +module.exports = { + inspectOrganismBoundaryGraph, + evaluateEdge, + evaluateRecommendation, + renderJsonLd, + renderMarkdownReport, + renderSvgReport, + digest, +}; diff --git a/organism-strain-boundary-guard/render-video.js b/organism-strain-boundary-guard/render-video.js new file mode 100644 index 00000000..23242cf2 --- /dev/null +++ b/organism-strain-boundary-guard/render-video.js @@ -0,0 +1,50 @@ +const fs = require("fs"); +const path = require("path"); +const { execFileSync } = require("child_process"); + +const reportDir = path.join(__dirname, "reports"); +const jsonPath = path.join(reportDir, "organism-boundary-review.json"); +const svgPath = path.join(reportDir, "organism-boundary-review.svg"); +const framePath = path.join(reportDir, "organism-boundary-review.png"); +const outputPath = path.join(reportDir, "demo.mp4"); + +if (!fs.existsSync(jsonPath) || !fs.existsSync(svgPath)) { + require("./demo"); +} + +const report = JSON.parse(fs.readFileSync(jsonPath, "utf8")); + +execFileSync( + "rsvg-convert", + ["--width", "1280", "--height", "720", "--output", framePath, svgPath], + { stdio: "inherit" } +); + +execFileSync( + "ffmpeg", + [ + "-y", + "-loop", + "1", + "-framerate", + "25", + "-i", + framePath, + "-t", + "4", + "-pix_fmt", + "yuv420p", + "-vf", + "scale=1280:720", + "-movflags", + "+faststart", + outputPath, + ], + { stdio: "inherit" } +); + +fs.unlinkSync(framePath); + +console.log( + `wrote ${path.relative(process.cwd(), outputPath)} for ${report.summary.edgeCount} biological edges` +); diff --git a/organism-strain-boundary-guard/reports/demo.mp4 b/organism-strain-boundary-guard/reports/demo.mp4 new file mode 100644 index 00000000..f18b45f7 Binary files /dev/null and b/organism-strain-boundary-guard/reports/demo.mp4 differ diff --git a/organism-strain-boundary-guard/reports/organism-boundary-review.json b/organism-strain-boundary-guard/reports/organism-boundary-review.json new file mode 100644 index 00000000..a99d7b3a --- /dev/null +++ b/organism-strain-boundary-guard/reports/organism-boundary-review.json @@ -0,0 +1,315 @@ +{ + "generatedAt": "2026-05-31T00:00:00Z", + "graphId": "scibase-organism-boundary-demo", + "summary": { + "graphId": "scibase-organism-boundary-demo", + "generatedAt": "2026-05-31T00:00:00Z", + "entityCount": 8, + "edgeCount": 5, + "edgeAllow": 2, + "edgeReview": 1, + "edgeSuppress": 2, + "recommendationCount": 3, + "recommendationShow": 1, + "recommendationNotice": 1, + "recommendationSuppress": 1 + }, + "edgeDecisions": [ + { + "edgeId": "edge:human-sample-organism", + "type": "sample_of_organism", + "source": "sample:human-lung-01", + "target": "organism:homo-sapiens", + "decision": "allow", + "blockers": [], + "warnings": [], + "actions": [ + "publish biological graph edge", + "allow recommendation paths to use this edge" + ], + "auditDigest": "77db95f47588df7a4e7e2e9fefe7fdbea2acc70135e194e000896f89a70e8b47" + }, + { + "edgeId": "edge:balbc-strain-human-parent", + "type": "strain_of_species", + "source": "strain:balb-c", + "target": "organism:homo-sapiens", + "decision": "suppress", + "blockers": [ + { + "code": "strain_parent_taxon_mismatch", + "message": "strain parent taxon does not match organism node", + "evidence": { + "strainParentTaxonId": "NCBITaxon:10090", + "organismTaxonId": "NCBITaxon:9606" + } + } + ], + "warnings": [], + "actions": [ + "suppress strain edge until parent organism is corrected", + "request strain registry evidence from curator" + ], + "auditDigest": "76aaa44f857a62daea2bf7bb7b434f939675143c9667aab968511efd578b4eeb" + }, + { + "edgeId": "edge:pathogen-mouse-host", + "type": "pathogen_in_host", + "source": "pathogen:sars-cov-2", + "target": "organism:mus-musculus", + "decision": "suppress", + "blockers": [ + { + "code": "host_taxon_not_supported", + "message": "host organism is not supported by pathogen host evidence", + "evidence": { + "pathogenId": "pathogen:sars-cov-2", + "targetTaxonId": "NCBITaxon:10090", + "supportedHostTaxonIds": [ + "NCBITaxon:9606", + "NCBITaxon:9544" + ] + } + } + ], + "warnings": [ + { + "code": "host_range_evidence_missing", + "message": "host edge lacks matching host-range evidence", + "evidence": { + "targetTaxonId": "NCBITaxon:10090" + } + } + ], + "actions": [ + "suppress host recommendation path", + "request host range evidence or mark as experimental model" + ], + "auditDigest": "3e512718b0236efb8be0268f51e8d2c83631964b313864940052d6d27a87b885" + }, + { + "edgeId": "edge:hela-human-cell-line", + "type": "cell_line_from_species", + "source": "cell-line:hela", + "target": "organism:homo-sapiens", + "decision": "allow", + "blockers": [], + "warnings": [], + "actions": [ + "publish biological graph edge", + "allow recommendation paths to use this edge" + ], + "auditDigest": "d94653cb2561ea2b2e3e420eed328b8979eb842e96fd5d9c36802e00dea584a0" + }, + { + "edgeId": "edge:mixed-sample-dataset", + "type": "dataset_contains_sample", + "source": "dataset:airway-atlas", + "target": "sample:mixed-airway-02", + "decision": "curator_review", + "blockers": [], + "warnings": [ + { + "code": "mixed_organism_review_missing", + "message": "mixed-organism sample needs contamination or co-culture review", + "evidence": { + "sampleId": "sample:mixed-airway-02", + "observedTaxonIds": [ + "NCBITaxon:9606", + "NCBITaxon:2697049" + ] + } + } + ], + "actions": [ + "show dataset path only with contamination review notice", + "request co-culture or contamination review evidence" + ], + "auditDigest": "57c370bba251646dcb718661da8208bc8eb86daef7b6438c28ee0fb09896d6d3" + } + ], + "recommendationDecisions": [ + { + "recommendationId": "rec:human-cell-line-context", + "label": "Show human sample and HeLa context on entity page", + "decision": "show", + "path": [ + { + "edgeId": "edge:human-sample-organism", + "decision": "allow", + "auditDigest": "77db95f47588df7a4e7e2e9fefe7fdbea2acc70135e194e000896f89a70e8b47" + }, + { + "edgeId": "edge:hela-human-cell-line", + "decision": "allow", + "auditDigest": "d94653cb2561ea2b2e3e420eed328b8979eb842e96fd5d9c36802e00dea584a0" + } + ], + "blockers": [], + "warnings": [], + "actions": [ + "show biological graph recommendation" + ], + "auditDigest": "4339fe085adbb40463402134ca1f3e24aba081036f1e4cc20d4f9d597da0ee20" + }, + { + "recommendationId": "rec:bad-strain-parent", + "label": "Recommend BALB/c as human organism evidence", + "decision": "suppress", + "path": [ + { + "edgeId": "edge:balbc-strain-human-parent", + "decision": "suppress", + "auditDigest": "76aaa44f857a62daea2bf7bb7b434f939675143c9667aab968511efd578b4eeb" + } + ], + "blockers": [ + { + "code": "path_contains_suppressed_edge", + "message": "recommendation uses a suppressed biological edge", + "evidence": { + "edgeId": "edge:balbc-strain-human-parent" + } + } + ], + "warnings": [], + "actions": [ + "suppress or annotate recommendation until biological boundary review completes" + ], + "auditDigest": "1f5c8e41562c3ecd0ffbe7c29a8210dd88006942b34bd55db559d84a5940e237" + }, + { + "recommendationId": "rec:mixed-sample-notice", + "label": "Recommend mixed airway culture with contamination notice", + "decision": "show_with_notice", + "path": [ + { + "edgeId": "edge:mixed-sample-dataset", + "decision": "curator_review", + "auditDigest": "57c370bba251646dcb718661da8208bc8eb86daef7b6438c28ee0fb09896d6d3" + } + ], + "blockers": [], + "warnings": [ + { + "code": "path_contains_review_edge", + "message": "recommendation uses an edge requiring curator review", + "evidence": { + "edgeId": "edge:mixed-sample-dataset" + } + } + ], + "actions": [ + "suppress or annotate recommendation until biological boundary review completes" + ], + "auditDigest": "53f51b55511ea89a7d932e36fd8e6eeaf5a365e222acf6aed8a994512d68c205" + } + ], + "jsonLd": { + "@context": { + "scibase": "https://scibase.ai/schema#", + "taxon": "https://www.ncbi.nlm.nih.gov/taxonomy/", + "decision": "scibase:decision", + "blocker": "scibase:blocker", + "warning": "scibase:warning" + }, + "@type": "scibase:OrganismBoundaryReviewPacket", + "@id": "scibase:organism-boundary:scibase-organism-boundary-demo", + "graphId": "scibase-organism-boundary-demo", + "generatedAt": "2026-05-31T00:00:00Z", + "summary": { + "graphId": "scibase-organism-boundary-demo", + "generatedAt": "2026-05-31T00:00:00Z", + "entityCount": 8, + "edgeCount": 5, + "edgeAllow": 2, + "edgeReview": 1, + "edgeSuppress": 2, + "recommendationCount": 3, + "recommendationShow": 1, + "recommendationNotice": 1, + "recommendationSuppress": 1 + }, + "edgeDecisions": [ + { + "@type": "scibase:OrganismBoundaryEdgeDecision", + "edgeId": "edge:human-sample-organism", + "decision": "allow", + "blockers": [], + "warnings": [], + "auditDigest": "77db95f47588df7a4e7e2e9fefe7fdbea2acc70135e194e000896f89a70e8b47" + }, + { + "@type": "scibase:OrganismBoundaryEdgeDecision", + "edgeId": "edge:balbc-strain-human-parent", + "decision": "suppress", + "blockers": [ + "strain_parent_taxon_mismatch" + ], + "warnings": [], + "auditDigest": "76aaa44f857a62daea2bf7bb7b434f939675143c9667aab968511efd578b4eeb" + }, + { + "@type": "scibase:OrganismBoundaryEdgeDecision", + "edgeId": "edge:pathogen-mouse-host", + "decision": "suppress", + "blockers": [ + "host_taxon_not_supported" + ], + "warnings": [ + "host_range_evidence_missing" + ], + "auditDigest": "3e512718b0236efb8be0268f51e8d2c83631964b313864940052d6d27a87b885" + }, + { + "@type": "scibase:OrganismBoundaryEdgeDecision", + "edgeId": "edge:hela-human-cell-line", + "decision": "allow", + "blockers": [], + "warnings": [], + "auditDigest": "d94653cb2561ea2b2e3e420eed328b8979eb842e96fd5d9c36802e00dea584a0" + }, + { + "@type": "scibase:OrganismBoundaryEdgeDecision", + "edgeId": "edge:mixed-sample-dataset", + "decision": "curator_review", + "blockers": [], + "warnings": [ + "mixed_organism_review_missing" + ], + "auditDigest": "57c370bba251646dcb718661da8208bc8eb86daef7b6438c28ee0fb09896d6d3" + } + ], + "recommendationDecisions": [ + { + "@type": "scibase:OrganismBoundaryRecommendationDecision", + "recommendationId": "rec:human-cell-line-context", + "decision": "show", + "blockers": [], + "warnings": [], + "auditDigest": "4339fe085adbb40463402134ca1f3e24aba081036f1e4cc20d4f9d597da0ee20" + }, + { + "@type": "scibase:OrganismBoundaryRecommendationDecision", + "recommendationId": "rec:bad-strain-parent", + "decision": "suppress", + "blockers": [ + "path_contains_suppressed_edge" + ], + "warnings": [], + "auditDigest": "1f5c8e41562c3ecd0ffbe7c29a8210dd88006942b34bd55db559d84a5940e237" + }, + { + "@type": "scibase:OrganismBoundaryRecommendationDecision", + "recommendationId": "rec:mixed-sample-notice", + "decision": "show_with_notice", + "blockers": [], + "warnings": [ + "path_contains_review_edge" + ], + "auditDigest": "53f51b55511ea89a7d932e36fd8e6eeaf5a365e222acf6aed8a994512d68c205" + } + ] + }, + "auditDigest": "56e2969b5599c79308f6540e2f8ca092c1dda3010de70618a953c4a462fe5a12" +} diff --git a/organism-strain-boundary-guard/reports/organism-boundary-review.md b/organism-strain-boundary-guard/reports/organism-boundary-review.md new file mode 100644 index 00000000..1d102c1a --- /dev/null +++ b/organism-strain-boundary-guard/reports/organism-boundary-review.md @@ -0,0 +1,93 @@ +# Organism Strain Boundary Report + +Generated: 2026-05-31T00:00:00Z +Graph: scibase-organism-boundary-demo +Audit digest: 56e2969b5599c79308f6540e2f8ca092c1dda3010de70618a953c4a462fe5a12 + +## Summary + +- Entities: 8 +- Edges: 5 +- Edge allow: 2 +- Edge curator review: 1 +- Edge suppress: 2 +- Recommendations: 3 +- Recommendation show: 1 +- Recommendation show with notice: 1 +- Recommendation suppress: 1 + +## Edge Decisions + +### edge:human-sample-organism + +- Type: sample_of_organism +- Decision: allow +- Blockers: none +- Warnings: none +- Actions: publish biological graph edge; allow recommendation paths to use this edge +- Digest: 77db95f47588df7a4e7e2e9fefe7fdbea2acc70135e194e000896f89a70e8b47 + +### edge:balbc-strain-human-parent + +- Type: strain_of_species +- Decision: suppress +- Blockers: strain_parent_taxon_mismatch +- Warnings: none +- Actions: suppress strain edge until parent organism is corrected; request strain registry evidence from curator +- Digest: 76aaa44f857a62daea2bf7bb7b434f939675143c9667aab968511efd578b4eeb + +### edge:pathogen-mouse-host + +- Type: pathogen_in_host +- Decision: suppress +- Blockers: host_taxon_not_supported +- Warnings: host_range_evidence_missing +- Actions: suppress host recommendation path; request host range evidence or mark as experimental model +- Digest: 3e512718b0236efb8be0268f51e8d2c83631964b313864940052d6d27a87b885 + +### edge:hela-human-cell-line + +- Type: cell_line_from_species +- Decision: allow +- Blockers: none +- Warnings: none +- Actions: publish biological graph edge; allow recommendation paths to use this edge +- Digest: d94653cb2561ea2b2e3e420eed328b8979eb842e96fd5d9c36802e00dea584a0 + +### edge:mixed-sample-dataset + +- Type: dataset_contains_sample +- Decision: curator_review +- Blockers: none +- Warnings: mixed_organism_review_missing +- Actions: show dataset path only with contamination review notice; request co-culture or contamination review evidence +- Digest: 57c370bba251646dcb718661da8208bc8eb86daef7b6438c28ee0fb09896d6d3 + +## Recommendation Decisions + +### rec:human-cell-line-context + +- Label: Show human sample and HeLa context on entity page +- Decision: show +- Path: edge:human-sample-organism:allow, edge:hela-human-cell-line:allow +- Blockers: none +- Warnings: none +- Digest: 4339fe085adbb40463402134ca1f3e24aba081036f1e4cc20d4f9d597da0ee20 + +### rec:bad-strain-parent + +- Label: Recommend BALB/c as human organism evidence +- Decision: suppress +- Path: edge:balbc-strain-human-parent:suppress +- Blockers: path_contains_suppressed_edge +- Warnings: none +- Digest: 1f5c8e41562c3ecd0ffbe7c29a8210dd88006942b34bd55db559d84a5940e237 + +### rec:mixed-sample-notice + +- Label: Recommend mixed airway culture with contamination notice +- Decision: show_with_notice +- Path: edge:mixed-sample-dataset:curator_review +- Blockers: none +- Warnings: path_contains_review_edge +- Digest: 53f51b55511ea89a7d932e36fd8e6eeaf5a365e222acf6aed8a994512d68c205 diff --git a/organism-strain-boundary-guard/reports/organism-boundary-review.svg b/organism-strain-boundary-guard/reports/organism-boundary-review.svg new file mode 100644 index 00000000..3579c3d2 --- /dev/null +++ b/organism-strain-boundary-guard/reports/organism-boundary-review.svg @@ -0,0 +1,35 @@ + + + + Organism Strain Boundary Guard + Graph scibase-organism-boundary-demo reviewed at 2026-05-31T00:00:00Z + + +Entities +8 + + +Edges allowed +2 + + +Edges reviewed +1 + + +Edges suppressed +2 + + +Recommendations shown +1 + + +Recommendations blocked +1 + Curator queue + edge:balbc-strain-human-parent - suppress - strain_parent_taxon_mismatch +edge:pathogen-mouse-host - suppress - host_taxon_not_supported, host_range_evidence_missing +edge:mixed-sample-dataset - curator_review - mixed_organism_review_missing + Audit digest 56e2969b5599c79308f6540e2f8ca092 + \ No newline at end of file diff --git a/organism-strain-boundary-guard/sample-data.js b/organism-strain-boundary-guard/sample-data.js new file mode 100644 index 00000000..7ca0552f --- /dev/null +++ b/organism-strain-boundary-guard/sample-data.js @@ -0,0 +1,153 @@ +const graph = { + graphId: "scibase-organism-boundary-demo", + generatedAt: "2026-05-31T00:00:00Z", + entities: [ + { + id: "organism:homo-sapiens", + type: "organism", + label: "Homo sapiens", + taxonId: "NCBITaxon:9606", + }, + { + id: "organism:mus-musculus", + type: "organism", + label: "Mus musculus", + taxonId: "NCBITaxon:10090", + }, + { + id: "strain:balb-c", + type: "strain", + label: "BALB/c mouse", + taxonId: "NCBITaxon:10090-BALBC", + parentTaxonId: "NCBITaxon:10090", + aliasEvidenceIds: ["ev:jax-strain-registry"], + }, + { + id: "pathogen:sars-cov-2", + type: "pathogen", + label: "SARS-CoV-2 isolate SCB-31", + taxonId: "NCBITaxon:2697049", + supportedHostTaxonIds: ["NCBITaxon:9606", "NCBITaxon:9544"], + }, + { + id: "cell-line:hela", + type: "cell_line", + label: "HeLa", + speciesTaxonId: "NCBITaxon:9606", + cultureCollectionId: "ATCC:CCL-2", + }, + { + id: "sample:human-lung-01", + type: "sample", + label: "Human lung biopsy 01", + taxonId: "NCBITaxon:9606", + voucherId: "SCB-VCH-001", + observedTaxonIds: ["NCBITaxon:9606"], + }, + { + id: "sample:mixed-airway-02", + type: "sample", + label: "Mixed airway culture 02", + taxonId: "NCBITaxon:9606", + voucherId: "SCB-VCH-044", + observedTaxonIds: ["NCBITaxon:9606", "NCBITaxon:2697049"], + contaminationReview: false, + }, + { + id: "dataset:airway-atlas", + type: "dataset", + label: "Airway infection atlas", + doi: "10.5555/scibase.airway", + }, + ], + evidence: [ + { + id: "ev:human-voucher", + type: "voucher", + label: "Human specimen voucher SCB-VCH-001", + taxonId: "NCBITaxon:9606", + }, + { + id: "ev:jax-strain-registry", + type: "strain_registry", + label: "JAX BALB/c strain registry", + taxonId: "NCBITaxon:10090-BALBC", + parentTaxonId: "NCBITaxon:10090", + }, + { + id: "ev:host-range-review", + type: "host_range", + label: "Curated SARS-CoV-2 host range", + supportedHostTaxonIds: ["NCBITaxon:9606", "NCBITaxon:9544"], + }, + { + id: "ev:hela-atcc", + type: "cell_line_registry", + label: "ATCC HeLa registry", + speciesTaxonId: "NCBITaxon:9606", + cultureCollectionId: "ATCC:CCL-2", + }, + { + id: "ev:dataset-doi", + type: "dataset_doi", + label: "Dataset DOI landing page", + doi: "10.5555/scibase.airway", + }, + ], + edges: [ + { + id: "edge:human-sample-organism", + type: "sample_of_organism", + source: "sample:human-lung-01", + target: "organism:homo-sapiens", + evidenceIds: ["ev:human-voucher"], + }, + { + id: "edge:balbc-strain-human-parent", + type: "strain_of_species", + source: "strain:balb-c", + target: "organism:homo-sapiens", + evidenceIds: ["ev:jax-strain-registry"], + }, + { + id: "edge:pathogen-mouse-host", + type: "pathogen_in_host", + source: "pathogen:sars-cov-2", + target: "organism:mus-musculus", + evidenceIds: ["ev:host-range-review"], + }, + { + id: "edge:hela-human-cell-line", + type: "cell_line_from_species", + source: "cell-line:hela", + target: "organism:homo-sapiens", + evidenceIds: ["ev:hela-atcc"], + }, + { + id: "edge:mixed-sample-dataset", + type: "dataset_contains_sample", + source: "dataset:airway-atlas", + target: "sample:mixed-airway-02", + evidenceIds: ["ev:dataset-doi"], + }, + ], + recommendations: [ + { + id: "rec:human-cell-line-context", + label: "Show human sample and HeLa context on entity page", + pathEdgeIds: ["edge:human-sample-organism", "edge:hela-human-cell-line"], + }, + { + id: "rec:bad-strain-parent", + label: "Recommend BALB/c as human organism evidence", + pathEdgeIds: ["edge:balbc-strain-human-parent"], + }, + { + id: "rec:mixed-sample-notice", + label: "Recommend mixed airway culture with contamination notice", + pathEdgeIds: ["edge:mixed-sample-dataset"], + }, + ], +}; + +module.exports = graph; diff --git a/organism-strain-boundary-guard/test.js b/organism-strain-boundary-guard/test.js new file mode 100644 index 00000000..acf0e691 --- /dev/null +++ b/organism-strain-boundary-guard/test.js @@ -0,0 +1,67 @@ +const assert = require("assert"); +const graph = require("./sample-data"); +const { inspectOrganismBoundaryGraph } = require("./index"); + +const report = inspectOrganismBoundaryGraph(graph); + +function edge(id) { + return report.edgeDecisions.find((item) => item.edgeId === id); +} + +function recommendation(id) { + return report.recommendationDecisions.find((item) => item.recommendationId === id); +} + +assert.strictEqual(report.summary.edgeCount, 5); +assert.strictEqual(report.summary.edgeAllow, 2); +assert.strictEqual(report.summary.edgeReview, 1); +assert.strictEqual(report.summary.edgeSuppress, 2); +assert.strictEqual(report.summary.recommendationCount, 3); +assert.strictEqual(report.summary.recommendationShow, 1); +assert.strictEqual(report.summary.recommendationNotice, 1); +assert.strictEqual(report.summary.recommendationSuppress, 1); + +assert.strictEqual(edge("edge:human-sample-organism").decision, "allow"); +assert.strictEqual(edge("edge:hela-human-cell-line").decision, "allow"); + +const strainEdge = edge("edge:balbc-strain-human-parent"); +assert.strictEqual(strainEdge.decision, "suppress"); +assert( + strainEdge.blockers.some((item) => item.code === "strain_parent_taxon_mismatch"), + "BALB/c strain must not map to human parent organism" +); + +const hostEdge = edge("edge:pathogen-mouse-host"); +assert.strictEqual(hostEdge.decision, "suppress"); +assert( + hostEdge.blockers.some((item) => item.code === "host_taxon_not_supported"), + "unsupported pathogen host taxon must suppress the edge" +); + +const mixedEdge = edge("edge:mixed-sample-dataset"); +assert.strictEqual(mixedEdge.decision, "curator_review"); +assert( + mixedEdge.warnings.some((item) => item.code === "mixed_organism_review_missing"), + "mixed-organism sample should require contamination or co-culture review" +); + +assert.strictEqual(recommendation("rec:human-cell-line-context").decision, "show"); +assert.strictEqual(recommendation("rec:bad-strain-parent").decision, "suppress"); +assert( + recommendation("rec:bad-strain-parent").blockers.some( + (item) => item.code === "path_contains_suppressed_edge" + ), + "recommendations using suppressed biological edges should be blocked" +); +assert.strictEqual(recommendation("rec:mixed-sample-notice").decision, "show_with_notice"); +assert( + recommendation("rec:mixed-sample-notice").warnings.some( + (item) => item.code === "path_contains_review_edge" + ), + "review-only biological edges should surface recommendation notices" +); + +assert.strictEqual(report.jsonLd["@type"], "scibase:OrganismBoundaryReviewPacket"); +assert.match(report.auditDigest, /^[a-f0-9]{64}$/); + +console.log("organism strain boundary guard tests passed");