Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 112 additions & 103 deletions backend/secuscan/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,18 +283,44 @@ def _format_timestamp(value: str) -> str:
continue
return value

@classmethod
def _build_pdf_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str:
evidence_html = f"<h4>Evidence</h4><pre>{cls._escape_html(finding['proof'])}</pre>" if finding.get("proof") else ""
remediation_html = f"<div class='remediation'><h4>Recommended action</h4><p>{cls._escape_html(finding['remediation'])}</p></div>" if finding.get("remediation") else ""
cve_html = f"<p class='meta'>CVE: {cls._escape_html(finding['cve'])}</p>" if finding.get("cve") else ""

return f"""
<div class="finding">
<table class="finding-header">
<tr>
<td class="severity severity-{finding['severity'].lower()}"><img class="severity-icon" src="{critical_icon}" alt=""> {cls._escape_html(finding['severity'])}</td>
<td>
<h3>{cls._escape_html(finding['title'])}</h3>
<p>{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target, " ")}</p>
</td>
</tr>
</table>
<h4>Description</h4>
<p>{cls._escape_html(finding['description'])}</p>
{evidence_html}
{remediation_html}
{cve_html}
</div>
"""

@classmethod
def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str:
"""Generate conservative HTML/CSS that xhtml2pdf can paginate reliably."""
payload = cls._build_report_payload(task, result)
findings = payload["findings"]
severity_counts = payload["severity_counts"]

shield_icon = cls._icon_data_uri("shield", "1e3a5f")
target_icon = cls._icon_data_uri("target", "2563eb")
findings_icon = cls._icon_data_uri("findings", "0f172a")
critical_icon = cls._icon_data_uri("critical", "991b1b")
rows_icon = cls._icon_data_uri("rows", "2563eb")
clock_icon = cls._icon_data_uri("clock", "475569")

target_html = cls._escape_html_with_breaks(payload["target"], " ")

summary_markup = "".join(
Expand All @@ -304,26 +330,10 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any])
f"<tr><td><label>{cls._escape_html(item['label'])}</label><strong>{cls._escape_html(item['value'])}</strong></td></tr>"
for item in payload["scan_parameters"]
)

finding_markup = "".join(
f"""
<div class="finding">
<table class="finding-header">
<tr>
<td class="severity severity-{finding['severity'].lower()}"><img class="severity-icon" src="{critical_icon}" alt=""> {cls._escape_html(finding['severity'])}</td>
<td>
<h3>{cls._escape_html(finding['title'])}</h3>
<p>{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or payload['target'], " ")}</p>
</td>
</tr>
</table>
<h4>Description</h4>
<p>{cls._escape_html(finding['description'])}</p>
{f"<h4>Evidence</h4><pre>{cls._escape_html(finding['proof'])}</pre>" if finding['proof'] else ""}
{f"<div class='remediation'><h4>Recommended action</h4><p>{cls._escape_html(finding['remediation'])}</p></div>" if finding['remediation'] else ""}
{f"<p class='meta'>CVE: {cls._escape_html(finding['cve'])}</p>" if finding['cve'] else ""}
</div>
"""
for finding in findings
cls._build_pdf_finding_markup(finding, payload["target"], critical_icon)
for finding in payload["findings"]
)

if not finding_markup:
Expand Down Expand Up @@ -546,7 +556,7 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any])

<table class="stats">
<tr>
<td><img class="stat-icon" src="{findings_icon}" alt=""><label>Total findings</label><strong>{len(findings)}</strong></td>
<td><img class="stat-icon" src="{findings_icon}" alt=""><label>Total findings</label><strong>{len(payload['findings'])}</strong></td>
<td><img class="stat-icon" src="{critical_icon}" alt=""><label>Critical</label><strong>{severity_counts['CRITICAL']}</strong></td>
<td><img class="stat-icon" src="{target_icon}" alt=""><label>High</label><strong>{severity_counts['HIGH']}</strong></td>
<td><img class="stat-icon" src="{rows_icon}" alt=""><label>Structured rows</label><strong>{len(payload['rows'])}</strong></td>
Expand Down Expand Up @@ -589,18 +599,46 @@ def generate_pdf_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> by
raise RuntimeError("Failed to render SecuScan HTML report as PDF")
return output.getvalue()

@classmethod
def _build_web_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str:
evidence_html = f"<section><h4>Evidence</h4><pre>{cls._escape_html(finding['proof'])}</pre></section>" if finding.get("proof") else ""
remediation_html = f"<section class='remediation'><h4>Recommended action</h4><p>{cls._escape_html(finding['remediation'])}</p></section>" if finding.get("remediation") else ""
cve_html = f"<section class='meta'><span>CVE: {cls._escape_html(finding['cve'])}</span></section>" if finding.get("cve") else ""

return f"""
<article class="finding-card">
<div class="finding-top">
<span class="severity severity-{finding['severity'].lower()}"><img class="mini-icon" src="{critical_icon}" alt=""> {cls._escape_html(finding['severity'])}</span>
<div class="finding-heading">
<h3>{cls._escape_html(finding['title'])}</h3>
<p>{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}</p>
</div>
</div>
<div class="finding-body">
<section>
<h4>Description</h4>
<p>{cls._escape_html(finding['description'])}</p>
</section>
{evidence_html}
{remediation_html}
{cve_html}
</div>
</article>
"""

@classmethod
def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str:
"""Generate a modern HTML report suitable for direct download."""
payload = cls._build_report_payload(task, result)
findings = payload["findings"]
severity_counts = payload["severity_counts"]

shield_icon = cls._icon_data_uri("shield", "1e3a5f")
target_icon = cls._icon_data_uri("target", "2563eb")
findings_icon = cls._icon_data_uri("findings", "0f172a")
critical_icon = cls._icon_data_uri("critical", "991b1b")
rows_icon = cls._icon_data_uri("rows", "2563eb")
clock_icon = cls._icon_data_uri("clock", "475569")

target_html = cls._escape_html_with_breaks(payload["target"])

summary_markup = "".join(
Expand All @@ -610,28 +648,10 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s
f"<div class=\"meta-card\"><label>{cls._escape_html(item['label'])}</label><strong>{cls._escape_html(item['value'])}</strong></div>"
for item in payload["scan_parameters"]
)

finding_markup = "".join(
f"""
<article class="finding-card">
<div class="finding-top">
<span class="severity severity-{finding['severity'].lower()}"><img class="mini-icon" src="{critical_icon}" alt=""> {cls._escape_html(finding['severity'])}</span>
<div class="finding-heading">
<h3>{cls._escape_html(finding['title'])}</h3>
<p>{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or payload['target'])}</p>
</div>
</div>
<div class="finding-body">
<section>
<h4>Description</h4>
<p>{cls._escape_html(finding['description'])}</p>
</section>
{f"<section><h4>Evidence</h4><pre>{cls._escape_html(finding['proof'])}</pre></section>" if finding['proof'] else ""}
{f"<section class='remediation'><h4>Recommended action</h4><p>{cls._escape_html(finding['remediation'])}</p></section>" if finding['remediation'] else ""}
{f"<section class='meta'><span>CVE: {cls._escape_html(finding['cve'])}</span></section>" if finding['cve'] else ""}
</div>
</article>
"""
for finding in findings
cls._build_web_finding_markup(finding, payload["target"], critical_icon)
for finding in payload["findings"]
)

if not finding_markup:
Expand Down Expand Up @@ -914,7 +934,7 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s
</div>

<div class="stat-grid">
<div class="stat-card" style="--accent: #0f172a;"><div class="stat-card-header"><label>Total findings</label><span class="card-icon"><img src="{findings_icon}" alt=""></span></div><strong>{len(findings)}</strong></div>
<div class="stat-card" style="--accent: #0f172a;"><div class="stat-card-header"><label>Total findings</label><span class="card-icon"><img src="{findings_icon}" alt=""></span></div><strong>{len(payload['findings'])}</strong></div>
<div class="stat-card" style="--accent: #991b1b;"><div class="stat-card-header"><label>Critical</label><span class="card-icon"><img src="{critical_icon}" alt=""></span></div><strong>{severity_counts['CRITICAL']}</strong></div>
<div class="stat-card" style="--accent: #dc2626;"><div class="stat-card-header"><label>High</label><span class="card-icon"><img src="{target_icon}" alt=""></span></div><strong>{severity_counts['HIGH']}</strong></div>
<div class="stat-card" style="--accent: #2563eb;"><div class="stat-card-header"><label>Structured rows</label><span class="card-icon"><img src="{rows_icon}" alt=""></span></div><strong>{len(payload['rows'])}</strong></div>
Expand Down Expand Up @@ -976,13 +996,58 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st
)
return output.getvalue()

@classmethod
def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str:
"""Extract a stable, deterministic rule ID for SARIF."""
raw_rule_id = None

cve = finding.get("cve")
if cve and isinstance(cve, str) and cve.strip():
raw_rule_id = cve.strip()

if not raw_rule_id:
cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe")
if cwe and isinstance(cwe, str) and cwe.strip():
raw_rule_id = cwe.strip()

if not raw_rule_id:
for key in ["check_id", "plugin_rule_id", "rule_id", "id"]:
val = finding.get(key) or finding.get("metadata", {}).get(key)
if val and isinstance(val, str) and val.strip():
raw_rule_id = val.strip()
break

if not raw_rule_id:
raw_rule_id = finding.get("title") or "security-finding"

rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower()
rule_id = re.sub(r"-+", "-", rule_id).strip("-")
return rule_id if rule_id else "security-finding"

@classmethod
def _extract_sarif_locations(cls, finding: Dict[str, Any], default_target: str) -> List[Dict[str, Any]]:
"""Extract location data for a SARIF finding."""
target = finding.get("target") or default_target
if not target:
return []

is_url = "://" in target or target.startswith(("http://", "https://"))
location = {"physicalLocation": {"artifactLocation": {"uri": target}}}

if not is_url and ":" in target:
parts = target.split(":")
if parts[-1].isdigit():
location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1])
location["physicalLocation"]["region"] = {"startLine": int(parts[-1])}

return [location]

@classmethod
def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str:
"""Generate a SARIF v2.1.0 report for GitHub Code Scanning."""
payload = cls._build_report_payload(task, result)
tool_name = payload["tool_name"]

# Define severity mapping to SARIF levels
severity_map = {
"CRITICAL": "error",
"HIGH": "error",
Expand All @@ -996,37 +1061,7 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) ->
results = []

for finding in payload["findings"]:
# Derive a stable, deterministic rule ID from finding-specific identifiers
raw_rule_id = None

# 1. Check CVE
cve = finding.get("cve")
if cve and isinstance(cve, str) and cve.strip():
raw_rule_id = cve.strip()

# 2. Check CWE (direct or in metadata)
if not raw_rule_id:
cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe")
if cwe and isinstance(cwe, str) and cwe.strip():
raw_rule_id = cwe.strip()

# 3. Check specific check/plugin/finding identifiers
if not raw_rule_id:
for key in ["check_id", "plugin_rule_id", "rule_id", "id"]:
val = finding.get(key) or finding.get("metadata", {}).get(key)
if val and isinstance(val, str) and val.strip():
raw_rule_id = val.strip()
break

# 4. Fallback to sanitized title
if not raw_rule_id:
raw_rule_id = finding.get("title") or "security-finding"

# Sanitize raw rule ID (lowercase, replace non-alphanumeric with hyphens)
rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower()
rule_id = re.sub(r"-+", "-", rule_id).strip("-")
if not rule_id:
rule_id = "security-finding"
rule_id = cls._extract_sarif_rule_id(finding)

if rule_id not in rule_indices:
rule_indices[rule_id] = len(rules)
Expand Down Expand Up @@ -1054,34 +1089,8 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) ->
"text": finding.get("description", "Security finding detected")
},
"level": severity_map.get(finding["severity"], "note"),
"locations": []
"locations": cls._extract_sarif_locations(finding, payload["target"])
}

# Attempt to extract location if available
target = finding.get("target") or payload["target"]
# Check if target looks like a file path or URI
if target:
is_url = "://" in target or target.startswith(("http://", "https://"))

location = {
"physicalLocation": {
"artifactLocation": {
"uri": target
}
}
}

# If target has a line number like file.py:123 and is NOT a web URL
if not is_url and ":" in target:
parts = target.split(":")
if parts[-1].isdigit():
location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1])
location["physicalLocation"]["region"] = {
"startLine": int(parts[-1])
}

sarif_result["locations"].append(location)

results.append(sarif_result)

sarif_output = {
Expand Down
80 changes: 80 additions & 0 deletions testing/backend/unit/test_reporting_parity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import json
import pytest
from backend.secuscan.reporting import reporting

@pytest.fixture
def sample_scan_data():
task = {
"id": "task_abc123",
"tool_name": "TestScanner",
"target": "https://example.com",
"status": "completed",
"created_at": "2026-05-31T12:00:00Z",
"preset": "default"
}
result = {
"findings": [
{
"title": "Reflected Cross-Site Scripting",
"severity": "HIGH",
"category": "Injection",
"description": "User input is reflected without sanitization.",
"remediation": "Escape all user-supplied input.",
"proof": "<script>alert(1)</script>",
"cve": "CVE-2024-0001",
"target": "https://example.com/search"
}
],
"summary": ["Found 1 high severity issue."],
"structured": {"rows": []}
}
return task, result

def test_sarif_output_parity(sample_scan_data):
"""Proves SARIF output maintains exact structural parity post-refactor."""
task, result = sample_scan_data
sarif_str = reporting.generate_sarif_report(task, result)
sarif_data = json.loads(sarif_str)

# Assert base schema and tool data parity
assert sarif_data["version"] == "2.1.0"
assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner"

# Assert rule extraction parity from the new _extract_sarif_rule_id helper
rules = sarif_data["runs"][0]["tool"]["driver"]["rules"]
assert len(rules) == 1
assert rules[0]["id"] == "cve-2024-0001"
assert rules[0]["name"] == "Reflected Cross-Site Scripting"

# Assert result mapping and location parity from the new _extract_sarif_locations helper
results = sarif_data["runs"][0]["results"]
assert len(results) == 1
assert results[0]["ruleId"] == "cve-2024-0001"
assert results[0]["level"] == "error"
assert results[0]["message"]["text"] == "User input is reflected without sanitization."
assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://example.com/search"

def test_html_web_output_parity(sample_scan_data):
"""Proves Web HTML output correctly injects modularized markup from _build_web_finding_markup."""
task, result = sample_scan_data
html_str = reporting.generate_html_report(task, result)

# Assert the modularized finding block rendered correctly with all data
assert "Reflected Cross-Site Scripting" in html_str
assert "severity-high" in html_str
assert "User input is reflected without sanitization." in html_str
assert "<pre>&lt;script&gt;alert(1)&lt;/script&gt;</pre>" in html_str # Checks HTML escaping parity
assert "Escape all user-supplied input." in html_str
assert "CVE-2024-0001" in html_str

def test_html_pdf_output_parity(sample_scan_data):
"""Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup."""
task, result = sample_scan_data

# Test the internal HTML generator for the PDF to verify string parity
pdf_html_str = reporting._generate_pdf_html_report(task, result)

# Assert table-based PDF markup rendered correctly
assert "<table class=\"finding-header\">" in pdf_html_str
assert "Reflected Cross-Site Scripting" in pdf_html_str
assert "User input is reflected without sanitization." in pdf_html_str
Loading