-
{len(findings)}
+
{len(payload['findings'])}
{severity_counts['CRITICAL']}
{severity_counts['HIGH']}
{len(payload['rows'])}
@@ -976,13 +996,58 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st
)
return output.getvalue()
+ @classmethod
+ def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str:
+ """Extract a stable, deterministic rule ID for SARIF."""
+ raw_rule_id = None
+
+ cve = finding.get("cve")
+ if cve and isinstance(cve, str) and cve.strip():
+ raw_rule_id = cve.strip()
+
+ if not raw_rule_id:
+ cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe")
+ if cwe and isinstance(cwe, str) and cwe.strip():
+ raw_rule_id = cwe.strip()
+
+ if not raw_rule_id:
+ for key in ["check_id", "plugin_rule_id", "rule_id", "id"]:
+ val = finding.get(key) or finding.get("metadata", {}).get(key)
+ if val and isinstance(val, str) and val.strip():
+ raw_rule_id = val.strip()
+ break
+
+ if not raw_rule_id:
+ raw_rule_id = finding.get("title") or "security-finding"
+
+ rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower()
+ rule_id = re.sub(r"-+", "-", rule_id).strip("-")
+ return rule_id if rule_id else "security-finding"
+
+ @classmethod
+ def _extract_sarif_locations(cls, finding: Dict[str, Any], default_target: str) -> List[Dict[str, Any]]:
+ """Extract location data for a SARIF finding."""
+ target = finding.get("target") or default_target
+ if not target:
+ return []
+
+ is_url = "://" in target or target.startswith(("http://", "https://"))
+ location = {"physicalLocation": {"artifactLocation": {"uri": target}}}
+
+ if not is_url and ":" in target:
+ parts = target.split(":")
+ if parts[-1].isdigit():
+ location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1])
+ location["physicalLocation"]["region"] = {"startLine": int(parts[-1])}
+
+ return [location]
+
@classmethod
def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str:
"""Generate a SARIF v2.1.0 report for GitHub Code Scanning."""
payload = cls._build_report_payload(task, result)
tool_name = payload["tool_name"]
- # Define severity mapping to SARIF levels
severity_map = {
"CRITICAL": "error",
"HIGH": "error",
@@ -996,37 +1061,7 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) ->
results = []
for finding in payload["findings"]:
- # Derive a stable, deterministic rule ID from finding-specific identifiers
- raw_rule_id = None
-
- # 1. Check CVE
- cve = finding.get("cve")
- if cve and isinstance(cve, str) and cve.strip():
- raw_rule_id = cve.strip()
-
- # 2. Check CWE (direct or in metadata)
- if not raw_rule_id:
- cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe")
- if cwe and isinstance(cwe, str) and cwe.strip():
- raw_rule_id = cwe.strip()
-
- # 3. Check specific check/plugin/finding identifiers
- if not raw_rule_id:
- for key in ["check_id", "plugin_rule_id", "rule_id", "id"]:
- val = finding.get(key) or finding.get("metadata", {}).get(key)
- if val and isinstance(val, str) and val.strip():
- raw_rule_id = val.strip()
- break
-
- # 4. Fallback to sanitized title
- if not raw_rule_id:
- raw_rule_id = finding.get("title") or "security-finding"
-
- # Sanitize raw rule ID (lowercase, replace non-alphanumeric with hyphens)
- rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower()
- rule_id = re.sub(r"-+", "-", rule_id).strip("-")
- if not rule_id:
- rule_id = "security-finding"
+ rule_id = cls._extract_sarif_rule_id(finding)
if rule_id not in rule_indices:
rule_indices[rule_id] = len(rules)
@@ -1054,34 +1089,8 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) ->
"text": finding.get("description", "Security finding detected")
},
"level": severity_map.get(finding["severity"], "note"),
- "locations": []
+ "locations": cls._extract_sarif_locations(finding, payload["target"])
}
-
- # Attempt to extract location if available
- target = finding.get("target") or payload["target"]
- # Check if target looks like a file path or URI
- if target:
- is_url = "://" in target or target.startswith(("http://", "https://"))
-
- location = {
- "physicalLocation": {
- "artifactLocation": {
- "uri": target
- }
- }
- }
-
- # If target has a line number like file.py:123 and is NOT a web URL
- if not is_url and ":" in target:
- parts = target.split(":")
- if parts[-1].isdigit():
- location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1])
- location["physicalLocation"]["region"] = {
- "startLine": int(parts[-1])
- }
-
- sarif_result["locations"].append(location)
-
results.append(sarif_result)
sarif_output = {
diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py
new file mode 100644
index 00000000..4d929877
--- /dev/null
+++ b/testing/backend/unit/test_reporting_parity.py
@@ -0,0 +1,80 @@
+import json
+import pytest
+from backend.secuscan.reporting import reporting
+
+@pytest.fixture
+def sample_scan_data():
+ task = {
+ "id": "task_abc123",
+ "tool_name": "TestScanner",
+ "target": "https://example.com",
+ "status": "completed",
+ "created_at": "2026-05-31T12:00:00Z",
+ "preset": "default"
+ }
+ result = {
+ "findings": [
+ {
+ "title": "Reflected Cross-Site Scripting",
+ "severity": "HIGH",
+ "category": "Injection",
+ "description": "User input is reflected without sanitization.",
+ "remediation": "Escape all user-supplied input.",
+ "proof": "",
+ "cve": "CVE-2024-0001",
+ "target": "https://example.com/search"
+ }
+ ],
+ "summary": ["Found 1 high severity issue."],
+ "structured": {"rows": []}
+ }
+ return task, result
+
+def test_sarif_output_parity(sample_scan_data):
+ """Proves SARIF output maintains exact structural parity post-refactor."""
+ task, result = sample_scan_data
+ sarif_str = reporting.generate_sarif_report(task, result)
+ sarif_data = json.loads(sarif_str)
+
+ # Assert base schema and tool data parity
+ assert sarif_data["version"] == "2.1.0"
+ assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner"
+
+ # Assert rule extraction parity from the new _extract_sarif_rule_id helper
+ rules = sarif_data["runs"][0]["tool"]["driver"]["rules"]
+ assert len(rules) == 1
+ assert rules[0]["id"] == "cve-2024-0001"
+ assert rules[0]["name"] == "Reflected Cross-Site Scripting"
+
+ # Assert result mapping and location parity from the new _extract_sarif_locations helper
+ results = sarif_data["runs"][0]["results"]
+ assert len(results) == 1
+ assert results[0]["ruleId"] == "cve-2024-0001"
+ assert results[0]["level"] == "error"
+ assert results[0]["message"]["text"] == "User input is reflected without sanitization."
+ assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://example.com/search"
+
+def test_html_web_output_parity(sample_scan_data):
+ """Proves Web HTML output correctly injects modularized markup from _build_web_finding_markup."""
+ task, result = sample_scan_data
+ html_str = reporting.generate_html_report(task, result)
+
+ # Assert the modularized finding block rendered correctly with all data
+ assert "Reflected Cross-Site Scripting" in html_str
+ assert "severity-high" in html_str
+ assert "User input is reflected without sanitization." in html_str
+ assert "
<script>alert(1)</script>
" in html_str # Checks HTML escaping parity
+ assert "Escape all user-supplied input." in html_str
+ assert "CVE-2024-0001" in html_str
+
+def test_html_pdf_output_parity(sample_scan_data):
+ """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup."""
+ task, result = sample_scan_data
+
+ # Test the internal HTML generator for the PDF to verify string parity
+ pdf_html_str = reporting._generate_pdf_html_report(task, result)
+
+ # Assert table-based PDF markup rendered correctly
+ assert "