Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions schemas/security_report_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def diagnose_security_report_example(
if isinstance(payload, dict):
warnings.extend(payload.get("security_warnings", []))
warnings.extend(payload.get("parser_warnings", []))
refs = document_reference_index(payload)

findings = report.get("findings") if isinstance(report, dict) else []
if not isinstance(findings, list):
Expand Down Expand Up @@ -95,6 +96,8 @@ def diagnose_security_report_example(
f"{ctx}: summary.{code} must be {expected_count} for report findings"
)

diagnose_findings_references(findings, refs, ctx, diagnostics)

inventories = report.get("inventories") if isinstance(report, dict) else {}
if not isinstance(inventories, dict):
diagnostics.append(f"{ctx}: inventories must be an object")
Expand Down Expand Up @@ -128,6 +131,8 @@ def diagnose_security_report_example(
f"{ctx}: external_links_present finding requires inventories.links external=true entry"
)

diagnose_inventory_references(inventory_lists, refs, ctx, diagnostics)

return diagnostics


Expand Down Expand Up @@ -161,3 +166,111 @@ def inventory_items(inventories, name, ctx, diagnostics):
diagnostics.append(f"{ctx}: inventories.{name} must be an array")
return []
return items


def document_reference_index(payload):
if not isinstance(payload, dict):
return {"pages": {}, "elements": {}, "spans": {}}
return {
"pages": keyed_objects(payload.get("pages", [])),
"elements": keyed_objects(payload.get("elements", [])),
"spans": keyed_objects(payload.get("spans", [])),
}


def keyed_objects(items):
if not isinstance(items, list):
return {}
return {
item["id"]: item
for item in items
if isinstance(item, dict) and isinstance(item.get("id"), str)
}


def diagnose_findings_references(findings, refs, ctx, diagnostics):
for index, finding in enumerate(findings):
if not isinstance(finding, dict):
continue
item_ctx = finding_ctx(finding, index)
page = finding.get("page")
if page is not None:
check_page_ref(page, refs, ctx, item_ctx, diagnostics)
check_locator_ref(
finding, "element_ref", "elements", refs, ctx, item_ctx, diagnostics
)
check_locator_ref(
finding, "span_ref", "spans", refs, ctx, item_ctx, diagnostics
)
if "bbox" in finding:
check_bbox(finding.get("bbox"), page, refs, ctx, item_ctx, diagnostics)


def diagnose_inventory_references(inventory_lists, refs, ctx, diagnostics):
for name, items in inventory_lists.items():
for index, item in enumerate(items):
if not isinstance(item, dict):
continue
item_ctx = f"inventories.{name}[{index}]"
page = item.get("page")
if page is not None:
check_page_ref(page, refs, ctx, item_ctx, diagnostics)
if "bbox" in item:
check_bbox(item.get("bbox"), page, refs, ctx, item_ctx, diagnostics)


def check_locator_ref(item, key, ref_kind, refs, ctx, item_ctx, diagnostics):
ref = item.get(key)
if ref is None:
return
target = refs[ref_kind].get(ref)
if target is None:
diagnostics.append(f"{ctx}: {item_ctx} references unknown {key} {ref}")
return
page = item.get("page")
target_page = target.get("page") if isinstance(target, dict) else None
if page is not None and target_page is not None and page != target_page:
diagnostics.append(
f"{ctx}: {item_ctx} {key} {ref} page {target_page} does not match page {page}"
)


def check_page_ref(page, refs, ctx, item_ctx, diagnostics):
if page not in refs["pages"]:
diagnostics.append(f"{ctx}: {item_ctx} references unknown page {page}")
return None
return refs["pages"][page]


def check_bbox(bbox, page, refs, ctx, item_ctx, diagnostics):
if page is None:
diagnostics.append(f"{ctx}: {item_ctx} bbox requires page")
return
page_obj = refs["pages"].get(page)
if page_obj is None:
return
if (
not isinstance(bbox, list)
or len(bbox) != 4
or any(not isinstance(coord, int) for coord in bbox)
):
diagnostics.append(f"{ctx}: {item_ctx} bbox must be four integer coordinates")
return
x0, y0, x1, y1 = bbox
if x0 >= x1 or y0 >= y1:
diagnostics.append(f"{ctx}: {item_ctx} bbox has zero area")
return
if (
x0 < 0
or y0 < 0
or x1 > page_obj.get("width", 0)
or y1 > page_obj.get("height", 0)
):
diagnostics.append(f"{ctx}: {item_ctx} bbox exceeds page {page} bounds")


def finding_ctx(finding, index):
finding_id = finding.get("id")
if isinstance(finding_id, str):
return f"finding {finding_id}"
return f"findings[{index}]"
136 changes: 136 additions & 0 deletions schemas/test_security_report_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,83 @@ def test_default_excluded_warning_codes_must_be_flagged(self) -> None:
diagnostics,
)

def test_finding_page_refs_must_exist_in_document(self) -> None:
report = copy.deepcopy(self.report)
report["findings"][1]["page"] = "p9999"

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: finding f0002 references unknown page p9999",
diagnostics,
)

def test_finding_element_refs_must_exist_in_document(self) -> None:
report = copy.deepcopy(self.report)
report["findings"][1]["element_ref"] = "e999999"

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: finding f0002 references unknown element_ref e999999",
diagnostics,
)

def test_finding_span_refs_must_match_finding_page(self) -> None:
document = copy.deepcopy(self.document)
document["payload"]["pages"].append(
{
"id": "p0002",
"index": 2,
"width": 61200,
"height": 79200,
"rotation": 0,
}
)
report = copy.deepcopy(self.report)
report["findings"][0]["page"] = "p0002"

diagnostics = diagnose_security_report_example(document, report)

self.assertIn(
"security-report.example.json: finding f0001 span_ref s000003 page p0001 "
"does not match page p0002",
diagnostics,
)

def test_finding_bbox_must_have_page(self) -> None:
report = copy.deepcopy(self.report)
report["findings"][0].pop("page")

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: finding f0001 bbox requires page",
diagnostics,
)

def test_finding_bbox_must_have_positive_area(self) -> None:
report = copy.deepcopy(self.report)
report["findings"][0]["bbox"][2] = report["findings"][0]["bbox"][0]

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: finding f0001 bbox has zero area",
diagnostics,
)

def test_finding_bbox_must_stay_inside_page_bounds(self) -> None:
report = copy.deepcopy(self.report)
report["findings"][0]["bbox"][2] = 61201

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: finding f0001 bbox exceeds page p0001 bounds",
diagnostics,
)

def test_annotations_inventory_requires_matching_finding(self) -> None:
report = copy.deepcopy(self.report)
report["findings"] = [
Expand Down Expand Up @@ -143,6 +220,65 @@ def test_annotations_finding_requires_inventory_entry(self) -> None:
diagnostics,
)

def test_inventory_page_refs_must_exist_in_document(self) -> None:
report = copy.deepcopy(self.report)
report["inventories"]["annotations"][0]["page"] = "p9999"

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: inventories.annotations[0] "
"references unknown page p9999",
diagnostics,
)

def test_inventory_bbox_must_have_page(self) -> None:
report = copy.deepcopy(self.report)
report["inventories"]["links"][0].pop("page")

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: inventories.links[0] bbox requires page",
diagnostics,
)

def test_inventory_bbox_must_have_positive_area(self) -> None:
report = copy.deepcopy(self.report)
report["inventories"]["links"][0]["bbox"][2] = report["inventories"]["links"][0][
"bbox"
][0]

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: inventories.links[0] bbox has zero area",
diagnostics,
)

def test_inventory_bbox_must_stay_inside_page_bounds(self) -> None:
report = copy.deepcopy(self.report)
report["inventories"]["annotations"][0]["bbox"][3] = 79201

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: inventories.annotations[0] "
"bbox exceeds page p0001 bounds",
diagnostics,
)

def test_action_inventory_page_refs_are_checked_without_action_semantics(self) -> None:
report = copy.deepcopy(self.report)
report["inventories"]["actions"][0]["page"] = "p9999"

diagnostics = diagnose_security_report_example(self.document, report)

self.assertIn(
"security-report.example.json: inventories.actions[0] references unknown page p9999",
diagnostics,
)

def test_external_link_inventory_requires_matching_finding(self) -> None:
report = copy.deepcopy(self.report)
report["findings"] = [
Expand Down
Loading