From 206ea6f155716ac4e4ce4f63efc9fb89e477b34d Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Fri, 29 May 2026 22:06:57 +0530 Subject: [PATCH 01/15] Refactor report generation and markup structure Refactor HTML report generation to use icon mapping and improve markup structure. --- backend/secuscan/reporting.py | 371 ++++++++++++++++++---------------- 1 file changed, 196 insertions(+), 175 deletions(-) diff --git a/backend/secuscan/reporting.py b/backend/secuscan/reporting.py index fb2e8987..2afc4b81 100644 --- a/backend/secuscan/reporting.py +++ b/backend/secuscan/reporting.py @@ -284,46 +284,108 @@ def _format_timestamp(value: str) -> str: return value @classmethod + def _build_pdf_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: + evidence_html = f"

Evidence

{cls._escape_html(finding['proof'])}
" if finding['proof'] else "" + remediation_html = f"

Recommended action

{cls._escape_html(finding['remediation'])}

" if finding['remediation'] else "" + cve_html = f"

CVE: {cls._escape_html(finding['cve'])}

" if finding['cve'] else "" + + return f""" +
+ + + + + +
{cls._escape_html(finding['severity'])} +

{cls._escape_html(finding['title'])}

+

{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target, " ")}

+
+

Description

+

{cls._escape_html(finding['description'])}

+ {evidence_html} + {remediation_html} + {cve_html} +
+ """ + + @classmethod + def _build_web_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: + evidence_html = f"

Evidence

{cls._escape_html(finding['proof'])}
" if finding['proof'] else "" + remediation_html = f"

Recommended action

{cls._escape_html(finding['remediation'])}

" if finding['remediation'] else "" + cve_html = f"
CVE: {cls._escape_html(finding['cve'])}
" if finding['cve'] else "" + + return f""" +
+
+ {cls._escape_html(finding['severity'])} +
+

{cls._escape_html(finding['title'])}

+

{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}

+
+
+
+
+

Description

+

{cls._escape_html(finding['description'])}

+
+ {evidence_html} + {remediation_html} + {cve_html} +
+
+ """ + + @classmethod + def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str: + raw_rule_id = None + cve = finding.get("cve") + if cve and isinstance(cve, str) and cve.strip(): + raw_rule_id = cve.strip() + + if not raw_rule_id: + cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe") + if cwe and isinstance(cwe, str) and cwe.strip(): + raw_rule_id = cwe.strip() + + if not raw_rule_id: + for key in ["check_id", "plugin_rule_id", "rule_id", "id"]: + val = finding.get(key) or finding.get("metadata", {}).get(key) + if val and isinstance(val, str) and val.strip(): + raw_rule_id = val.strip() + break + + if not raw_rule_id: + raw_rule_id = finding.get("title") or "security-finding" + + rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower() + rule_id = re.sub(r"-+", "-", rule_id).strip("-") + return rule_id if rule_id else "security-finding" + +@classmethod def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate conservative HTML/CSS that xhtml2pdf can paginate reliably.""" payload = cls._build_report_payload(task, result) - findings = payload["findings"] severity_counts = payload["severity_counts"] - shield_icon = cls._icon_data_uri("shield", "1e3a5f") - target_icon = cls._icon_data_uri("target", "2563eb") - findings_icon = cls._icon_data_uri("findings", "0f172a") - critical_icon = cls._icon_data_uri("critical", "991b1b") - rows_icon = cls._icon_data_uri("rows", "2563eb") - clock_icon = cls._icon_data_uri("clock", "475569") + + icons = { + "shield": cls._icon_data_uri("shield", "1e3a5f"), + "target": cls._icon_data_uri("target", "2563eb"), + "findings": cls._icon_data_uri("findings", "0f172a"), + "critical": cls._icon_data_uri("critical", "991b1b"), + "rows": cls._icon_data_uri("rows", "2563eb"), + "clock": cls._icon_data_uri("clock", "475569") + } + target_html = cls._escape_html_with_breaks(payload["target"], " ") - - summary_markup = "".join( - f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"] - ) + summary_markup = "".join(f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"]) parameter_markup = "".join( f"{cls._escape_html(item['value'])}" for item in payload["scan_parameters"] ) + finding_markup = "".join( - f""" -
    - - - - - -
    {cls._escape_html(finding['severity'])} -

    {cls._escape_html(finding['title'])}

    -

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or payload['target'], " ")}

    -
    -

    Description

    -

    {cls._escape_html(finding['description'])}

    - {f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else ""} - {f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else ""} - {f"

    CVE: {cls._escape_html(finding['cve'])}

    " if finding['cve'] else ""} -
    - """ - for finding in findings + cls._build_pdf_finding_markup(finding, payload['target'], icons['critical']) + for finding in payload["findings"] ) if not finding_markup: @@ -535,7 +597,7 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any])
    - + " for item in payload["scan_parameters"] ) - + finding_markup = "".join( - cls._build_pdf_finding_markup(finding, payload['target'], icons['critical']) + cls._build_pdf_finding_markup(finding, payload['target'], icons['critical']) for finding in payload["findings"] ) - if not finding_markup: finding_markup = """
    @@ -641,12 +640,12 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) """ - @classmethod +@classmethod def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a modern HTML report suitable for direct download.""" payload = cls._build_report_payload(task, result) severity_counts = payload["severity_counts"] - + icons = { "shield": cls._icon_data_uri("shield", "1e3a5f"), "target": cls._icon_data_uri("target", "2563eb"), @@ -655,19 +654,18 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s "rows": cls._icon_data_uri("rows", "2563eb"), "clock": cls._icon_data_uri("clock", "475569") } - + target_html = cls._escape_html_with_breaks(payload["target"]) summary_markup = "".join(f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"]) parameter_markup = "".join( f"
    {cls._escape_html(item['value'])}
    " for item in payload["scan_parameters"] ) - + finding_markup = "".join( - cls._build_web_finding_markup(finding, payload['target'], icons['critical']) + cls._build_web_finding_markup(finding, payload['target'], icons['critical']) for finding in payload["findings"] ) - if not finding_markup: finding_markup = """
    @@ -1062,12 +1060,12 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a SARIF v2.1.0 report for GitHub Code Scanning.""" payload = cls._build_report_payload(task, result) - + severity_map = { "CRITICAL": "error", "HIGH": "error", "MEDIUM": "warning", "LOW": "note", "INFO": "note" } - + rules = [] rule_indices = {} results = [] @@ -1088,7 +1086,7 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> target = finding.get("target") or payload["target"] locations = [] - + if target: is_url = "://" in target or target.startswith(("http://", "https://")) location = {"physicalLocation": {"artifactLocation": {"uri": target}}} From 95185ee1d3ae715a0a2330249f58eedf430e677c Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Fri, 29 May 2026 22:28:35 +0530 Subject: [PATCH 03/15] Refactor PDF and HTML report generation methods --- backend/secuscan/reporting.py | 764 +++++++++------------------------- 1 file changed, 197 insertions(+), 567 deletions(-) diff --git a/backend/secuscan/reporting.py b/backend/secuscan/reporting.py index 95e8a4b2..06b6a0a1 100644 --- a/backend/secuscan/reporting.py +++ b/backend/secuscan/reporting.py @@ -283,85 +283,33 @@ def _format_timestamp(value: str) -> str: continue return value - @classmethod +@classmethod def _build_pdf_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: + """Helper to generate HTML markup for a single PDF finding.""" evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else "" remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else "" cve_html = f"

    CVE: {cls._escape_html(finding['cve'])}

    " if finding['cve'] else "" return f"""
    -
    SecuScan security export

    {target_html}

    @@ -546,17 +608,17 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) - - - - + + + +
    {len(findings)}{severity_counts['CRITICAL']}{severity_counts['HIGH']}{len(payload['rows'])}{len(payload['findings'])}{severity_counts['CRITICAL']}{severity_counts['HIGH']}{len(payload['rows'])}
    -

    Executive Overview

    +

    Executive Overview

      {summary_markup}
    -

    Assessment Details

    +

    Assessment Details

    @@ -568,70 +630,42 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any])
    {cls._escape_html(payload['task_id'] or 'Unknown')}
    -

    Scan Parameters

    +

    Scan Parameters

    {parameter_markup}
    -

    Technical Findings

    +

    Technical Findings

    {finding_markup} """ - @classmethod - def generate_pdf_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> bytes: - """Generate the PDF from the same HTML used by the browser report.""" - html_report = cls._generate_pdf_html_report(task, result) - output = io.BytesIO() - pdf = pisa.CreatePDF(src=html_report, dest=output, encoding="utf-8") - if pdf.err: - raise RuntimeError("Failed to render SecuScan HTML report as PDF") - return output.getvalue() - @classmethod def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a modern HTML report suitable for direct download.""" payload = cls._build_report_payload(task, result) - findings = payload["findings"] severity_counts = payload["severity_counts"] - shield_icon = cls._icon_data_uri("shield", "1e3a5f") - target_icon = cls._icon_data_uri("target", "2563eb") - findings_icon = cls._icon_data_uri("findings", "0f172a") - critical_icon = cls._icon_data_uri("critical", "991b1b") - rows_icon = cls._icon_data_uri("rows", "2563eb") - clock_icon = cls._icon_data_uri("clock", "475569") + + icons = { + "shield": cls._icon_data_uri("shield", "1e3a5f"), + "target": cls._icon_data_uri("target", "2563eb"), + "findings": cls._icon_data_uri("findings", "0f172a"), + "critical": cls._icon_data_uri("critical", "991b1b"), + "rows": cls._icon_data_uri("rows", "2563eb"), + "clock": cls._icon_data_uri("clock", "475569") + } + target_html = cls._escape_html_with_breaks(payload["target"]) - - summary_markup = "".join( - f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"] - ) + summary_markup = "".join(f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"]) parameter_markup = "".join( f"
    {cls._escape_html(item['value'])}
    " for item in payload["scan_parameters"] ) + finding_markup = "".join( - f""" -
    -
    - {cls._escape_html(finding['severity'])} -
    -

    {cls._escape_html(finding['title'])}

    -

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or payload['target'])}

    -
    -
    -
    -
    -

    Description

    -

    {cls._escape_html(finding['description'])}

    -
    - {f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else ""} - {f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else ""} - {f"
    CVE: {cls._escape_html(finding['cve'])}
    " if finding['cve'] else ""} -
    -
    - """ - for finding in findings + cls._build_web_finding_markup(finding, payload['target'], icons['critical']) + for finding in payload["findings"] ) if not finding_markup: @@ -893,6 +927,54 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s }} + +
    +
    +
    +
    +
    +
    SecuScan security export
    +

    {target_html}

    +

    This report packages the most important findings, evidence, and remediation guidance from the latest assessment run into a cleaner analyst-friendly format.

    +
    +
    +
    + +
    +
    {cls._escape_html(payload['tool_name'])}
    +
    {cls._escape_html(payload['status'].upper())}
    +
    {cls._escape_html(cls._format_timestamp(payload['created_at']))}
    +
    {cls._escape_html(payload['generated_at'])}
    +
    + +
    +
    {len(payload['findings'])}
    +
    {severity_counts['CRITICAL']}
    +
    {severity_counts['HIGH']}
    +
    {len(payload['rows'])}
    +
    + +
    +

    Executive Overview

    +

    Key takeaways generated from the parsed assessment data.

    +
      {summary_markup}
    +
    + +
    +

    Scan Parameters

    +

    Runtime configuration captured for this task, including the selected Nikto flags and SecuScan preset context.

    +
    {parameter_markup}
    +
    + +
    +

    Technical Findings

    +

    Detailed finding cards with severity context, supporting evidence, and recommended next actions.

    +
    {finding_markup}
    +
    +
    + +""" +
    @@ -976,133 +1058,72 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st ) return output.getvalue() - @classmethod +@classmethod def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a SARIF v2.1.0 report for GitHub Code Scanning.""" payload = cls._build_report_payload(task, result) - tool_name = payload["tool_name"] - - # Define severity mapping to SARIF levels + severity_map = { - "CRITICAL": "error", - "HIGH": "error", - "MEDIUM": "warning", - "LOW": "note", - "INFO": "note" + "CRITICAL": "error", "HIGH": "error", "MEDIUM": "warning", + "LOW": "note", "INFO": "note" } - + rules = [] rule_indices = {} results = [] for finding in payload["findings"]: - # Derive a stable, deterministic rule ID from finding-specific identifiers - raw_rule_id = None - - # 1. Check CVE - cve = finding.get("cve") - if cve and isinstance(cve, str) and cve.strip(): - raw_rule_id = cve.strip() - - # 2. Check CWE (direct or in metadata) - if not raw_rule_id: - cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe") - if cwe and isinstance(cwe, str) and cwe.strip(): - raw_rule_id = cwe.strip() - - # 3. Check specific check/plugin/finding identifiers - if not raw_rule_id: - for key in ["check_id", "plugin_rule_id", "rule_id", "id"]: - val = finding.get(key) or finding.get("metadata", {}).get(key) - if val and isinstance(val, str) and val.strip(): - raw_rule_id = val.strip() - break - - # 4. Fallback to sanitized title - if not raw_rule_id: - raw_rule_id = finding.get("title") or "security-finding" - - # Sanitize raw rule ID (lowercase, replace non-alphanumeric with hyphens) - rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower() - rule_id = re.sub(r"-+", "-", rule_id).strip("-") - if not rule_id: - rule_id = "security-finding" + rule_id = cls._extract_sarif_rule_id(finding) if rule_id not in rule_indices: rule_indices[rule_id] = len(rules) rules.append({ "id": rule_id, "name": finding.get("title", "Security Finding"), - "shortDescription": { - "text": finding.get("title", "Security Finding") - }, - "fullDescription": { - "text": finding.get("description", "No detailed description available.") - }, - "help": { - "text": finding.get("remediation", "No remediation provided.") - }, - "properties": { - "precision": "high" - } + "shortDescription": {"text": finding.get("title", "Security Finding")}, + "fullDescription": {"text": finding.get("description", "No detailed description available.")}, + "help": {"text": finding.get("remediation", "No remediation provided.")}, + "properties": {"precision": "high"} }) - sarif_result = { - "ruleId": rule_id, - "ruleIndex": rule_indices[rule_id], - "message": { - "text": finding.get("description", "Security finding detected") - }, - "level": severity_map.get(finding["severity"], "note"), - "locations": [] - } - - # Attempt to extract location if available target = finding.get("target") or payload["target"] - # Check if target looks like a file path or URI + locations = [] + if target: is_url = "://" in target or target.startswith(("http://", "https://")) + location = {"physicalLocation": {"artifactLocation": {"uri": target}}} - location = { - "physicalLocation": { - "artifactLocation": { - "uri": target - } - } - } - - # If target has a line number like file.py:123 and is NOT a web URL if not is_url and ":" in target: parts = target.split(":") if parts[-1].isdigit(): location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1]) - location["physicalLocation"]["region"] = { - "startLine": int(parts[-1]) - } - - sarif_result["locations"].append(location) + location["physicalLocation"]["region"] = {"startLine": int(parts[-1])} + locations.append(location) - results.append(sarif_result) + results.append({ + "ruleId": rule_id, + "ruleIndex": rule_indices[rule_id], + "message": {"text": finding.get("description", "Security finding detected")}, + "level": severity_map.get(finding["severity"], "note"), + "locations": locations + }) sarif_output = { "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", "version": "2.1.0", - "runs": [ - { - "tool": { - "driver": { - "name": tool_name, - "version": "1.0.0", - "informationUri": "https://github.com/utksh1/SecuScan", - "rules": rules - } - }, - "results": results - } - ] + "runs": [{ + "tool": { + "driver": { + "name": payload["tool_name"], + "version": "1.0.0", + "informationUri": "https://github.com/utksh1/SecuScan", + "rules": rules + } + }, + "results": results + }] } return json.dumps(sarif_output, indent=2) - reporting = ReportGenerator() From 2dd500302805f29ab597d930102e1627522ceade Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Fri, 29 May 2026 22:23:23 +0530 Subject: [PATCH 02/15] Refactor HTML report generation methods --- backend/secuscan/reporting.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/backend/secuscan/reporting.py b/backend/secuscan/reporting.py index 2afc4b81..95e8a4b2 100644 --- a/backend/secuscan/reporting.py +++ b/backend/secuscan/reporting.py @@ -366,7 +366,7 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) """Generate conservative HTML/CSS that xhtml2pdf can paginate reliably.""" payload = cls._build_report_payload(task, result) severity_counts = payload["severity_counts"] - + icons = { "shield": cls._icon_data_uri("shield", "1e3a5f"), "target": cls._icon_data_uri("target", "2563eb"), @@ -375,19 +375,18 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) "rows": cls._icon_data_uri("rows", "2563eb"), "clock": cls._icon_data_uri("clock", "475569") } - + target_html = cls._escape_html_with_breaks(payload["target"], " ") summary_markup = "".join(f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"]) parameter_markup = "".join( f"
    {cls._escape_html(item['value'])}
    +
    - - + + -
    {cls._escape_html(finding['severity'])} + {cls._escape_html(finding['severity'])}

    {cls._escape_html(finding['title'])}

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target, " ")}

    -
    -

    Description

    -

    {cls._escape_html(finding['description'])}

    - {evidence_html} - {remediation_html} - {cve_html} + +

    Description

    +

    {cls._escape_html(finding['description'])}

    + {evidence_html} + {remediation_html} + {cve_html}
    """ @classmethod - def _build_web_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: - evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else "" - remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else "" - cve_html = f"
    CVE: {cls._escape_html(finding['cve'])}
    " if finding['cve'] else "" - - return f""" -
    -
    - {cls._escape_html(finding['severity'])} -
    -

    {cls._escape_html(finding['title'])}

    -

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}

    -
    -
    -
    -
    -

    Description

    -

    {cls._escape_html(finding['description'])}

    -
    - {evidence_html} - {remediation_html} - {cve_html} -
    -
    - """ - - @classmethod - def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str: - raw_rule_id = None - cve = finding.get("cve") - if cve and isinstance(cve, str) and cve.strip(): - raw_rule_id = cve.strip() - - if not raw_rule_id: - cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe") - if cwe and isinstance(cwe, str) and cwe.strip(): - raw_rule_id = cwe.strip() - - if not raw_rule_id: - for key in ["check_id", "plugin_rule_id", "rule_id", "id"]: - val = finding.get(key) or finding.get("metadata", {}).get(key) - if val and isinstance(val, str) and val.strip(): - raw_rule_id = val.strip() - break - - if not raw_rule_id: - raw_rule_id = finding.get("title") or "security-finding" - - rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower() - rule_id = re.sub(r"-+", "-", rule_id).strip("-") - return rule_id if rule_id else "security-finding" - -@classmethod def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate conservative HTML/CSS that xhtml2pdf can paginate reliably.""" payload = cls._build_report_payload(task, result) @@ -387,6 +335,7 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) cls._build_pdf_finding_markup(finding, payload['target'], icons['critical']) for finding in payload["findings"] ) + if not finding_markup: finding_markup = """
    @@ -401,195 +350,44 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) SecuScan Report - {cls._escape_html(payload['target'])} @@ -640,7 +438,45 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) """ -@classmethod + @classmethod + def generate_pdf_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> bytes: + """Generate the PDF from the same HTML used by the browser report.""" + html_report = cls._generate_pdf_html_report(task, result) + output = io.BytesIO() + pdf = pisa.CreatePDF(src=html_report, dest=output, encoding="utf-8") + if pdf.err: + raise RuntimeError("Failed to render SecuScan HTML report as PDF") + return output.getvalue() + + @classmethod + def _build_web_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: + """Helper to generate HTML markup for a single web finding.""" + evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else "" + remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else "" + cve_html = f"
    CVE: {cls._escape_html(finding['cve'])}
    " if finding['cve'] else "" + + return f""" +
    +
    + {cls._escape_html(finding['severity'])} +
    +

    {cls._escape_html(finding['title'])}

    +

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}

    +
    +
    +
    +
    +

    Description

    +

    {cls._escape_html(finding['description'])}

    +
    + {evidence_html} + {remediation_html} + {cve_html} +
    +
    + """ + + @classmethod def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a modern HTML report suitable for direct download.""" payload = cls._build_report_payload(task, result) @@ -666,13 +502,14 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s cls._build_web_finding_markup(finding, payload['target'], icons['critical']) for finding in payload["findings"] ) + if not finding_markup: finding_markup = """
    -
    -

    No structured findings were available

    -

    This report finished without parsed findings. Review the raw task output in SecuScan for more detail.

    -
    +
    +

    No structured findings were available

    +

    This report finished without parsed findings. Review the raw task output in SecuScan for more detail.

    +
    """ @@ -684,243 +521,64 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s SecuScan Report - {cls._escape_html(payload['target'])} @@ -971,54 +629,6 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s
    -""" - - -
    -
    -
    -
    -
    -
    SecuScan security export
    -

    {target_html}

    -

    This report packages the most important findings, evidence, and remediation guidance from the latest assessment run into a cleaner analyst-friendly format.

    -
    -
    -
    - -
    -
    {cls._escape_html(payload['tool_name'])}
    -
    {cls._escape_html(payload['status'].upper())}
    -
    {cls._escape_html(cls._format_timestamp(payload['created_at']))}
    -
    {cls._escape_html(payload['generated_at'])}
    -
    - -
    -
    {len(findings)}
    -
    {severity_counts['CRITICAL']}
    -
    {severity_counts['HIGH']}
    -
    {len(payload['rows'])}
    -
    - -
    -

    Executive Overview

    -

    Key takeaways generated from the parsed assessment data.

    - -
    - -
    -

    Scan Parameters

    -

    Runtime configuration captured for this task, including the selected Nikto flags and SecuScan preset context.

    -
    {parameter_markup}
    -
    - -
    -

    Technical Findings

    -

    Detailed finding cards with severity context, supporting evidence, and recommended next actions.

    -
    {finding_markup}
    -
    -
    - """ @classmethod @@ -1028,17 +638,7 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st output = io.StringIO() writer = csv.writer(output) writer.writerow( - [ - "Severity", - "Title", - "Category", - "Target", - "CVSS", - "CVE", - "Description", - "Evidence", - "Remediation", - ] + ["Severity", "Title", "Category", "Target", "CVSS", "CVE", "Description", "Evidence", "Remediation"] ) for finding in payload["findings"]: writer.writerow( @@ -1056,16 +656,57 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st ) return output.getvalue() -@classmethod + @classmethod + def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str: + """Extract a stable, deterministic rule ID for SARIF.""" + raw_rule_id = None + + cve = finding.get("cve") + if cve and isinstance(cve, str) and cve.strip(): + raw_rule_id = cve.strip() + + if not raw_rule_id: + cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe") + if cwe and isinstance(cwe, str) and cwe.strip(): + raw_rule_id = cwe.strip() + + if not raw_rule_id: + for key in ["check_id", "plugin_rule_id", "rule_id", "id"]: + val = finding.get(key) or finding.get("metadata", {}).get(key) + if val and isinstance(val, str) and val.strip(): + raw_rule_id = val.strip() + break + + if not raw_rule_id: + raw_rule_id = finding.get("title") or "security-finding" + + rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower() + rule_id = re.sub(r"-+", "-", rule_id).strip("-") + return rule_id if rule_id else "security-finding" + + @classmethod + def _extract_sarif_locations(cls, finding: Dict[str, Any], default_target: str) -> List[Dict[str, Any]]: + """Extract location data for a SARIF finding.""" + target = finding.get("target") or default_target + if not target: + return [] + + is_url = "://" in target or target.startswith(("http://", "https://")) + location = {"physicalLocation": {"artifactLocation": {"uri": target}}} + + if not is_url and ":" in target: + parts = target.split(":") + if parts[-1].isdigit(): + location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1]) + location["physicalLocation"]["region"] = {"startLine": int(parts[-1])} + + return [location] + + @classmethod def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a SARIF v2.1.0 report for GitHub Code Scanning.""" payload = cls._build_report_payload(task, result) - severity_map = { - "CRITICAL": "error", "HIGH": "error", "MEDIUM": "warning", - "LOW": "note", "INFO": "note" - } - rules = [] rule_indices = {} results = [] @@ -1084,42 +725,31 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> "properties": {"precision": "high"} }) - target = finding.get("target") or payload["target"] - locations = [] - - if target: - is_url = "://" in target or target.startswith(("http://", "https://")) - location = {"physicalLocation": {"artifactLocation": {"uri": target}}} - - if not is_url and ":" in target: - parts = target.split(":") - if parts[-1].isdigit(): - location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1]) - location["physicalLocation"]["region"] = {"startLine": int(parts[-1])} - locations.append(location) - - results.append({ + sarif_result = { "ruleId": rule_id, "ruleIndex": rule_indices[rule_id], "message": {"text": finding.get("description", "Security finding detected")}, - "level": severity_map.get(finding["severity"], "note"), - "locations": locations - }) + "level": cls.SARIF_SEVERITY_MAP.get(finding["severity"], "note"), + "locations": cls._extract_sarif_locations(finding, payload["target"]) + } + results.append(sarif_result) sarif_output = { "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", "version": "2.1.0", - "runs": [{ - "tool": { - "driver": { - "name": payload["tool_name"], - "version": "1.0.0", - "informationUri": "https://github.com/utksh1/SecuScan", - "rules": rules - } - }, - "results": results - }] + "runs": [ + { + "tool": { + "driver": { + "name": payload["tool_name"], + "version": "1.0.0", + "informationUri": "https://github.com/utksh1/SecuScan", + "rules": rules + } + }, + "results": results + } + ] } return json.dumps(sarif_output, indent=2) From 790ec592f09a36f58fa3a58caa44f87b94948908 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Fri, 29 May 2026 22:36:55 +0530 Subject: [PATCH 04/15] Update reporting.py --- backend/secuscan/reporting.py | 706 +++++++++++++++++++++++++--------- 1 file changed, 533 insertions(+), 173 deletions(-) diff --git a/backend/secuscan/reporting.py b/backend/secuscan/reporting.py index 06b6a0a1..084cf827 100644 --- a/backend/secuscan/reporting.py +++ b/backend/secuscan/reporting.py @@ -283,31 +283,30 @@ def _format_timestamp(value: str) -> str: continue return value -@classmethod + @classmethod def _build_pdf_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: - """Helper to generate HTML markup for a single PDF finding.""" - evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else "" - remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else "" - cve_html = f"

    CVE: {cls._escape_html(finding['cve'])}

    " if finding['cve'] else "" - + evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding.get("proof") else "" + remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding.get("remediation") else "" + cve_html = f"

    CVE: {cls._escape_html(finding['cve'])}

    " if finding.get("cve") else "" + return f""" -
    - - - - - -
    {cls._escape_html(finding['severity'])} -

    {cls._escape_html(finding['title'])}

    -

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target, " ")}

    -
    -

    Description

    -

    {cls._escape_html(finding['description'])}

    - {evidence_html} - {remediation_html} - {cve_html} -
    - """ +
    + + + + + +
    {cls._escape_html(finding['severity'])} +

    {cls._escape_html(finding['title'])}

    +

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target, " ")}

    +
    +

    Description

    +

    {cls._escape_html(finding['description'])}

    + {evidence_html} + {remediation_html} + {cve_html} +
    + """ @classmethod def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: @@ -315,24 +314,25 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) payload = cls._build_report_payload(task, result) severity_counts = payload["severity_counts"] - icons = { - "shield": cls._icon_data_uri("shield", "1e3a5f"), - "target": cls._icon_data_uri("target", "2563eb"), - "findings": cls._icon_data_uri("findings", "0f172a"), - "critical": cls._icon_data_uri("critical", "991b1b"), - "rows": cls._icon_data_uri("rows", "2563eb"), - "clock": cls._icon_data_uri("clock", "475569") - } + shield_icon = cls._icon_data_uri("shield", "1e3a5f") + target_icon = cls._icon_data_uri("target", "2563eb") + findings_icon = cls._icon_data_uri("findings", "0f172a") + critical_icon = cls._icon_data_uri("critical", "991b1b") + rows_icon = cls._icon_data_uri("rows", "2563eb") + clock_icon = cls._icon_data_uri("clock", "475569") target_html = cls._escape_html_with_breaks(payload["target"], " ") - summary_markup = "".join(f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"]) + + summary_markup = "".join( + f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"] + ) parameter_markup = "".join( f"{cls._escape_html(item['value'])}" for item in payload["scan_parameters"] ) finding_markup = "".join( - cls._build_pdf_finding_markup(finding, payload['target'], icons['critical']) + cls._build_pdf_finding_markup(finding, payload["target"], critical_icon) for finding in payload["findings"] ) @@ -350,51 +350,202 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) SecuScan Report - {cls._escape_html(payload['target'])}
    - +
    SecuScan security export

    {target_html}

    @@ -405,17 +556,17 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) - - - - + + + +
    {len(payload['findings'])}{severity_counts['CRITICAL']}{severity_counts['HIGH']}{len(payload['rows'])}{len(payload['findings'])}{severity_counts['CRITICAL']}{severity_counts['HIGH']}{len(payload['rows'])}
    -

    Executive Overview

    +

    Executive Overview

      {summary_markup}
    -

    Assessment Details

    +

    Assessment Details

    @@ -427,12 +578,12 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any])
    {cls._escape_html(payload['task_id'] or 'Unknown')}
    -

    Scan Parameters

    +

    Scan Parameters

    {parameter_markup}
    -

    Technical Findings

    +

    Technical Findings

    {finding_markup} @@ -450,31 +601,30 @@ def generate_pdf_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> by @classmethod def _build_web_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: - """Helper to generate HTML markup for a single web finding.""" - evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding['proof'] else "" - remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding['remediation'] else "" - cve_html = f"
    CVE: {cls._escape_html(finding['cve'])}
    " if finding['cve'] else "" + evidence_html = f"

    Evidence

    {cls._escape_html(finding['proof'])}
    " if finding.get("proof") else "" + remediation_html = f"

    Recommended action

    {cls._escape_html(finding['remediation'])}

    " if finding.get("remediation") else "" + cve_html = f"
    CVE: {cls._escape_html(finding['cve'])}
    " if finding.get("cve") else "" return f""" -
    -
    - {cls._escape_html(finding['severity'])} -
    -

    {cls._escape_html(finding['title'])}

    -

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}

    -
    -
    -
    -
    -

    Description

    -

    {cls._escape_html(finding['description'])}

    -
    - {evidence_html} - {remediation_html} - {cve_html} -
    -
    - """ +
    +
    + {cls._escape_html(finding['severity'])} +
    +

    {cls._escape_html(finding['title'])}

    +

    {cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}

    +
    +
    +
    +
    +

    Description

    +

    {cls._escape_html(finding['description'])}

    +
    + {evidence_html} + {remediation_html} + {cve_html} +
    +
    + """ @classmethod def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: @@ -482,34 +632,35 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s payload = cls._build_report_payload(task, result) severity_counts = payload["severity_counts"] - icons = { - "shield": cls._icon_data_uri("shield", "1e3a5f"), - "target": cls._icon_data_uri("target", "2563eb"), - "findings": cls._icon_data_uri("findings", "0f172a"), - "critical": cls._icon_data_uri("critical", "991b1b"), - "rows": cls._icon_data_uri("rows", "2563eb"), - "clock": cls._icon_data_uri("clock", "475569") - } + shield_icon = cls._icon_data_uri("shield", "1e3a5f") + target_icon = cls._icon_data_uri("target", "2563eb") + findings_icon = cls._icon_data_uri("findings", "0f172a") + critical_icon = cls._icon_data_uri("critical", "991b1b") + rows_icon = cls._icon_data_uri("rows", "2563eb") + clock_icon = cls._icon_data_uri("clock", "475569") target_html = cls._escape_html_with_breaks(payload["target"]) - summary_markup = "".join(f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"]) + + summary_markup = "".join( + f"
  • {cls._escape_html(line)}
  • " for line in payload["summary"] + ) parameter_markup = "".join( f"
    {cls._escape_html(item['value'])}
    " for item in payload["scan_parameters"] ) finding_markup = "".join( - cls._build_web_finding_markup(finding, payload['target'], icons['critical']) + cls._build_web_finding_markup(finding, payload["target"], critical_icon) for finding in payload["findings"] ) if not finding_markup: finding_markup = """
    -
    -

    No structured findings were available

    -

    This report finished without parsed findings. Review the raw task output in SecuScan for more detail.

    -
    +
    +

    No structured findings were available

    +

    This report finished without parsed findings. Review the raw task output in SecuScan for more detail.

    +
    """ @@ -521,64 +672,243 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s SecuScan Report - {cls._escape_html(payload['target'])} @@ -587,7 +917,7 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s
    -
    +
    SecuScan security export

    {target_html}

    @@ -597,33 +927,33 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s
    -
    {cls._escape_html(payload['tool_name'])}
    -
    {cls._escape_html(payload['status'].upper())}
    -
    {cls._escape_html(cls._format_timestamp(payload['created_at']))}
    -
    {cls._escape_html(payload['generated_at'])}
    +
    {cls._escape_html(payload['tool_name'])}
    +
    {cls._escape_html(payload['status'].upper())}
    +
    {cls._escape_html(cls._format_timestamp(payload['created_at']))}
    +
    {cls._escape_html(payload['generated_at'])}
    -
    {len(payload['findings'])}
    -
    {severity_counts['CRITICAL']}
    -
    {severity_counts['HIGH']}
    -
    {len(payload['rows'])}
    +
    {len(payload['findings'])}
    +
    {severity_counts['CRITICAL']}
    +
    {severity_counts['HIGH']}
    +
    {len(payload['rows'])}
    -

    Executive Overview

    +

    Executive Overview

    Key takeaways generated from the parsed assessment data.

      {summary_markup}
    -

    Scan Parameters

    +

    Scan Parameters

    Runtime configuration captured for this task, including the selected Nikto flags and SecuScan preset context.

    {parameter_markup}
    -

    Technical Findings

    +

    Technical Findings

    Detailed finding cards with severity context, supporting evidence, and recommended next actions.

    {finding_markup}
    @@ -638,7 +968,17 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st output = io.StringIO() writer = csv.writer(output) writer.writerow( - ["Severity", "Title", "Category", "Target", "CVSS", "CVE", "Description", "Evidence", "Remediation"] + [ + "Severity", + "Title", + "Category", + "Target", + "CVSS", + "CVE", + "Description", + "Evidence", + "Remediation", + ] ) for finding in payload["findings"]: writer.writerow( @@ -706,6 +1046,15 @@ def _extract_sarif_locations(cls, finding: Dict[str, Any], default_target: str) def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a SARIF v2.1.0 report for GitHub Code Scanning.""" payload = cls._build_report_payload(task, result) + tool_name = payload["tool_name"] + + severity_map = { + "CRITICAL": "error", + "HIGH": "error", + "MEDIUM": "warning", + "LOW": "note", + "INFO": "note" + } rules = [] rule_indices = {} @@ -719,17 +1068,27 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> rules.append({ "id": rule_id, "name": finding.get("title", "Security Finding"), - "shortDescription": {"text": finding.get("title", "Security Finding")}, - "fullDescription": {"text": finding.get("description", "No detailed description available.")}, - "help": {"text": finding.get("remediation", "No remediation provided.")}, - "properties": {"precision": "high"} + "shortDescription": { + "text": finding.get("title", "Security Finding") + }, + "fullDescription": { + "text": finding.get("description", "No detailed description available.") + }, + "help": { + "text": finding.get("remediation", "No remediation provided.") + }, + "properties": { + "precision": "high" + } }) sarif_result = { "ruleId": rule_id, "ruleIndex": rule_indices[rule_id], - "message": {"text": finding.get("description", "Security finding detected")}, - "level": cls.SARIF_SEVERITY_MAP.get(finding["severity"], "note"), + "message": { + "text": finding.get("description", "Security finding detected") + }, + "level": severity_map.get(finding["severity"], "note"), "locations": cls._extract_sarif_locations(finding, payload["target"]) } results.append(sarif_result) @@ -741,7 +1100,7 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> { "tool": { "driver": { - "name": payload["tool_name"], + "name": tool_name, "version": "1.0.0", "informationUri": "https://github.com/utksh1/SecuScan", "rules": rules @@ -754,4 +1113,5 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> return json.dumps(sarif_output, indent=2) + reporting = ReportGenerator() From 7edca849ab54dacfc8b476384dcb5cfe213fe610 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 19:45:38 +0530 Subject: [PATCH 05/15] test(backend): add parity tests to prove refactor equivalence --- testing/backend/unit/test_reporting_parity.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 testing/backend/unit/test_reporting_parity.py diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py new file mode 100644 index 00000000..71511af4 --- /dev/null +++ b/testing/backend/unit/test_reporting_parity.py @@ -0,0 +1,80 @@ +import json +import pytest +from backend.secuscan.reporting import reporting + +@pytest.fixture +def sample_scan_data(): + task = { + "id": "task_abc123", + "tool_name": "TestScanner", + "target": "https://example.com", + "status": "completed", + "created_at": "2026-05-31T12:00:00Z", + "preset": "default" + } + result = { + "findings": [ + { + "title": "Reflected Cross-Site Scripting", + "severity": "HIGH", + "category": "Injection", + "description": "User input is reflected without sanitization.", + "remediation": "Escape all user-supplied input.", + "proof": "", + "cve": "CVE-2024-0001", + "target": "https://example.com/search" + } + ], + "summary": ["Found 1 high severity issue."], + "structured": {"rows": []} + } + return task, result + +def test_sarif_output_parity(sample_scan_data): + """Proves SARIF output maintains exact structural parity post-refactor.""" + task, result = sample_scan_data + sarif_str = reporting.generate_sarif_report(task, result) + sarif_data = json.loads(sarif_str) + + # Assert base schema and tool data parity + assert sarif_data["version"] == "2.1.0" + assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner" + + # Assert rule extraction parity from the new _extract_sarif_rule_id helper + rules = sarif_data["runs"][0]["tool"]["driver"]["rules"] + assert len(rules) == 1 + assert rules[0]["id"] == "cve-2024-0001" + assert rules[0]["name"] == "Reflected Cross-Site Scripting" + + # Assert result mapping and location parity from the new _extract_sarif_locations helper + results = sarif_data["runs"][0]["results"] + assert len(results) == 1 + assert results[0]["ruleId"] == "cve-2024-0001" + assert results[0]["level"] == "error" + assert results[0]["message"]["text"] == "User input is reflected without sanitization." + assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://example.com/search" + +def test_html_web_output_parity(sample_scan_data): + """Proves Web HTML output correctly injects modularized markup from _build_web_finding_markup.""" + task, result = sample_scan_data + html_str = reporting.generate_html_report(task, result) + + # Assert the modularized finding block rendered correctly with all data + assert "Reflected Cross-Site Scripting" in html_str + assert "severity-high" in html_str + assert "User input is reflected without sanitization." in html_str + assert "
    <script>alert(1)</script>
    " in html_str # Checks HTML escaping parity + assert "Escape all user-supplied input." in html_str + assert "CVE-2024-0001" in html_str + +def test_html_pdf_output_parity(sample_scan_data): + """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup.""" + task, result = sample_scan_data + + # Test the internal HTML generator for the PDF to verify string parity + pdf_html_str = reporting._generate_pdf_html_report(task, result) + + # Assert table-based PDF markup rendered correctly + assert "" in pdf_html_str + assert "Reflected Cross-Site Scripting" in pdf_html_str + assert "User input is reflected without sanitization." in pdf_html_str \ No newline at end of file From 122f339bdcb5d2acc39cfa6af709f3771ccbb138 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 19:51:57 +0530 Subject: [PATCH 06/15] test(backend): add parity tests to prove refactor equivalence --- testing/backend/unit/test_reporting_parity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py index 71511af4..bd8a82e6 100644 --- a/testing/backend/unit/test_reporting_parity.py +++ b/testing/backend/unit/test_reporting_parity.py @@ -77,4 +77,4 @@ def test_html_pdf_output_parity(sample_scan_data): # Assert table-based PDF markup rendered correctly assert "
    " in pdf_html_str assert "Reflected Cross-Site Scripting" in pdf_html_str - assert "User input is reflected without sanitization." in pdf_html_str \ No newline at end of file + assert "User input is reflected without sanitization." in pdf_html_str From 277832c015a0393813b951ac111c819637478b62 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 19:55:51 +0530 Subject: [PATCH 07/15] Update test_reporting_parity.py --- testing/backend/unit/test_reporting_parity.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py index bd8a82e6..6b687eba 100644 --- a/testing/backend/unit/test_reporting_parity.py +++ b/testing/backend/unit/test_reporting_parity.py @@ -36,17 +36,14 @@ def test_sarif_output_parity(sample_scan_data): sarif_str = reporting.generate_sarif_report(task, result) sarif_data = json.loads(sarif_str) - # Assert base schema and tool data parity assert sarif_data["version"] == "2.1.0" assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner" - - # Assert rule extraction parity from the new _extract_sarif_rule_id helper + rules = sarif_data["runs"][0]["tool"]["driver"]["rules"] assert len(rules) == 1 assert rules[0]["id"] == "cve-2024-0001" assert rules[0]["name"] == "Reflected Cross-Site Scripting" - - # Assert result mapping and location parity from the new _extract_sarif_locations helper + results = sarif_data["runs"][0]["results"] assert len(results) == 1 assert results[0]["ruleId"] == "cve-2024-0001" @@ -59,22 +56,18 @@ def test_html_web_output_parity(sample_scan_data): task, result = sample_scan_data html_str = reporting.generate_html_report(task, result) - # Assert the modularized finding block rendered correctly with all data assert "Reflected Cross-Site Scripting" in html_str assert "severity-high" in html_str assert "User input is reflected without sanitization." in html_str - assert "
    <script>alert(1)</script>
    " in html_str # Checks HTML escaping parity + assert "
    <script>alert(1)</script>
    " in html_str assert "Escape all user-supplied input." in html_str assert "CVE-2024-0001" in html_str def test_html_pdf_output_parity(sample_scan_data): """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup.""" task, result = sample_scan_data - - # Test the internal HTML generator for the PDF to verify string parity pdf_html_str = reporting._generate_pdf_html_report(task, result) - # Assert table-based PDF markup rendered correctly assert "
    " in pdf_html_str assert "Reflected Cross-Site Scripting" in pdf_html_str assert "User input is reflected without sanitization." in pdf_html_str From 289122f1ce6e04f146140bdfa6a14d9107f4d598 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:02:28 +0530 Subject: [PATCH 08/15] Refactor test_reporting_parity.py for clarity From 6801b9f7e540d5942f13090ad00c6f624d3c206d Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:05:03 +0530 Subject: [PATCH 09/15] Refactor test_reporting_parity.py for clarity From c4f372db8598188bca645fa4c5425f1a84025596 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:13:40 +0530 Subject: [PATCH 10/15] Refactor test functions for reporting parity --- testing/backend/unit/test_reporting_parity.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py index 6b687eba..a6f70722 100644 --- a/testing/backend/unit/test_reporting_parity.py +++ b/testing/backend/unit/test_reporting_parity.py @@ -36,14 +36,17 @@ def test_sarif_output_parity(sample_scan_data): sarif_str = reporting.generate_sarif_report(task, result) sarif_data = json.loads(sarif_str) + # Assert base schema and tool data parity assert sarif_data["version"] == "2.1.0" assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner" + # Assert rule extraction parity from the new _extract_sarif_rule_id helper rules = sarif_data["runs"][0]["tool"]["driver"]["rules"] assert len(rules) == 1 assert rules[0]["id"] == "cve-2024-0001" assert rules[0]["name"] == "Reflected Cross-Site Scripting" + # Assert result mapping and location parity from the new _extract_sarif_locations helper results = sarif_data["runs"][0]["results"] assert len(results) == 1 assert results[0]["ruleId"] == "cve-2024-0001" @@ -56,18 +59,22 @@ def test_html_web_output_parity(sample_scan_data): task, result = sample_scan_data html_str = reporting.generate_html_report(task, result) + # Assert the modularized finding block rendered correctly with all data assert "Reflected Cross-Site Scripting" in html_str assert "severity-high" in html_str assert "User input is reflected without sanitization." in html_str - assert "
    <script>alert(1)</script>
    " in html_str + assert "
    <script>alert(1)</script>
    " in html_str # Checks HTML escaping parity assert "Escape all user-supplied input." in html_str assert "CVE-2024-0001" in html_str def test_html_pdf_output_parity(sample_scan_data): """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup.""" task, result = sample_scan_data + + # Test the internal HTML generator for the PDF to verify string parity pdf_html_str = reporting._generate_pdf_html_report(task, result) + # Assert table-based PDF markup rendered correctly assert "
    " in pdf_html_str assert "Reflected Cross-Site Scripting" in pdf_html_str assert "User input is reflected without sanitization." in pdf_html_str From 003a276931fcfe23bce3cb90d506c2a1dc445cf2 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:18:34 +0530 Subject: [PATCH 11/15] Refactor test_reporting_parity.py for clarity From 8097afe25f2d434f7c33ecd58e679ff196d27ac2 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Thu, 4 Jun 2026 00:03:21 +0530 Subject: [PATCH 12/15] Refactor test_reporting_parity.py for clarity From 232626afee8f95311fbf5418b4536ee997458cba Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Thu, 4 Jun 2026 00:14:31 +0530 Subject: [PATCH 13/15] Refactor test functions and restore imports --- testing/backend/unit/test_reporting_parity.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py index a6f70722..82e0dfea 100644 --- a/testing/backend/unit/test_reporting_parity.py +++ b/testing/backend/unit/test_reporting_parity.py @@ -1,7 +1,6 @@ import json import pytest from backend.secuscan.reporting import reporting - @pytest.fixture def sample_scan_data(): task = { @@ -29,23 +28,19 @@ def sample_scan_data(): "structured": {"rows": []} } return task, result - def test_sarif_output_parity(sample_scan_data): """Proves SARIF output maintains exact structural parity post-refactor.""" task, result = sample_scan_data sarif_str = reporting.generate_sarif_report(task, result) sarif_data = json.loads(sarif_str) - # Assert base schema and tool data parity assert sarif_data["version"] == "2.1.0" assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner" - # Assert rule extraction parity from the new _extract_sarif_rule_id helper rules = sarif_data["runs"][0]["tool"]["driver"]["rules"] assert len(rules) == 1 assert rules[0]["id"] == "cve-2024-0001" assert rules[0]["name"] == "Reflected Cross-Site Scripting" - # Assert result mapping and location parity from the new _extract_sarif_locations helper results = sarif_data["runs"][0]["results"] assert len(results) == 1 @@ -53,12 +48,10 @@ def test_sarif_output_parity(sample_scan_data): assert results[0]["level"] == "error" assert results[0]["message"]["text"] == "User input is reflected without sanitization." assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://example.com/search" - def test_html_web_output_parity(sample_scan_data): """Proves Web HTML output correctly injects modularized markup from _build_web_finding_markup.""" task, result = sample_scan_data html_str = reporting.generate_html_report(task, result) - # Assert the modularized finding block rendered correctly with all data assert "Reflected Cross-Site Scripting" in html_str assert "severity-high" in html_str @@ -66,14 +59,11 @@ def test_html_web_output_parity(sample_scan_data): assert "
    <script>alert(1)</script>
    " in html_str # Checks HTML escaping parity assert "Escape all user-supplied input." in html_str assert "CVE-2024-0001" in html_str - def test_html_pdf_output_parity(sample_scan_data): """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup.""" task, result = sample_scan_data - # Test the internal HTML generator for the PDF to verify string parity pdf_html_str = reporting._generate_pdf_html_report(task, result) - # Assert table-based PDF markup rendered correctly assert "
    " in pdf_html_str assert "Reflected Cross-Site Scripting" in pdf_html_str From 4a337a17adc5c0d1e5652d3277600b7c2a0f213b Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Thu, 4 Jun 2026 00:18:03 +0530 Subject: [PATCH 14/15] Delete testing/backend/unit/test_reporting_parity.py --- testing/backend/unit/test_reporting_parity.py | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 testing/backend/unit/test_reporting_parity.py diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py deleted file mode 100644 index 82e0dfea..00000000 --- a/testing/backend/unit/test_reporting_parity.py +++ /dev/null @@ -1,70 +0,0 @@ -import json -import pytest -from backend.secuscan.reporting import reporting -@pytest.fixture -def sample_scan_data(): - task = { - "id": "task_abc123", - "tool_name": "TestScanner", - "target": "https://example.com", - "status": "completed", - "created_at": "2026-05-31T12:00:00Z", - "preset": "default" - } - result = { - "findings": [ - { - "title": "Reflected Cross-Site Scripting", - "severity": "HIGH", - "category": "Injection", - "description": "User input is reflected without sanitization.", - "remediation": "Escape all user-supplied input.", - "proof": "", - "cve": "CVE-2024-0001", - "target": "https://example.com/search" - } - ], - "summary": ["Found 1 high severity issue."], - "structured": {"rows": []} - } - return task, result -def test_sarif_output_parity(sample_scan_data): - """Proves SARIF output maintains exact structural parity post-refactor.""" - task, result = sample_scan_data - sarif_str = reporting.generate_sarif_report(task, result) - sarif_data = json.loads(sarif_str) - # Assert base schema and tool data parity - assert sarif_data["version"] == "2.1.0" - assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner" - # Assert rule extraction parity from the new _extract_sarif_rule_id helper - rules = sarif_data["runs"][0]["tool"]["driver"]["rules"] - assert len(rules) == 1 - assert rules[0]["id"] == "cve-2024-0001" - assert rules[0]["name"] == "Reflected Cross-Site Scripting" - # Assert result mapping and location parity from the new _extract_sarif_locations helper - results = sarif_data["runs"][0]["results"] - assert len(results) == 1 - assert results[0]["ruleId"] == "cve-2024-0001" - assert results[0]["level"] == "error" - assert results[0]["message"]["text"] == "User input is reflected without sanitization." - assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://example.com/search" -def test_html_web_output_parity(sample_scan_data): - """Proves Web HTML output correctly injects modularized markup from _build_web_finding_markup.""" - task, result = sample_scan_data - html_str = reporting.generate_html_report(task, result) - # Assert the modularized finding block rendered correctly with all data - assert "Reflected Cross-Site Scripting" in html_str - assert "severity-high" in html_str - assert "User input is reflected without sanitization." in html_str - assert "
    <script>alert(1)</script>
    " in html_str # Checks HTML escaping parity - assert "Escape all user-supplied input." in html_str - assert "CVE-2024-0001" in html_str -def test_html_pdf_output_parity(sample_scan_data): - """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup.""" - task, result = sample_scan_data - # Test the internal HTML generator for the PDF to verify string parity - pdf_html_str = reporting._generate_pdf_html_report(task, result) - # Assert table-based PDF markup rendered correctly - assert "
    " in pdf_html_str - assert "Reflected Cross-Site Scripting" in pdf_html_str - assert "User input is reflected without sanitization." in pdf_html_str From 853d2345abbf0b06528baf74171135c78ef32a73 Mon Sep 17 00:00:00 2001 From: Rohit Kattimani <82595800+RohitKattimani@users.noreply.github.com> Date: Thu, 4 Jun 2026 00:38:51 +0530 Subject: [PATCH 15/15] test(backend): add parity tests to prove refactor equivalence add parity tests to prove refactor equivalence --- testing/backend/unit/test_reporting_parity.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 testing/backend/unit/test_reporting_parity.py diff --git a/testing/backend/unit/test_reporting_parity.py b/testing/backend/unit/test_reporting_parity.py new file mode 100644 index 00000000..4d929877 --- /dev/null +++ b/testing/backend/unit/test_reporting_parity.py @@ -0,0 +1,80 @@ +import json +import pytest +from backend.secuscan.reporting import reporting + +@pytest.fixture +def sample_scan_data(): + task = { + "id": "task_abc123", + "tool_name": "TestScanner", + "target": "https://example.com", + "status": "completed", + "created_at": "2026-05-31T12:00:00Z", + "preset": "default" + } + result = { + "findings": [ + { + "title": "Reflected Cross-Site Scripting", + "severity": "HIGH", + "category": "Injection", + "description": "User input is reflected without sanitization.", + "remediation": "Escape all user-supplied input.", + "proof": "", + "cve": "CVE-2024-0001", + "target": "https://example.com/search" + } + ], + "summary": ["Found 1 high severity issue."], + "structured": {"rows": []} + } + return task, result + +def test_sarif_output_parity(sample_scan_data): + """Proves SARIF output maintains exact structural parity post-refactor.""" + task, result = sample_scan_data + sarif_str = reporting.generate_sarif_report(task, result) + sarif_data = json.loads(sarif_str) + + # Assert base schema and tool data parity + assert sarif_data["version"] == "2.1.0" + assert sarif_data["runs"][0]["tool"]["driver"]["name"] == "TestScanner" + + # Assert rule extraction parity from the new _extract_sarif_rule_id helper + rules = sarif_data["runs"][0]["tool"]["driver"]["rules"] + assert len(rules) == 1 + assert rules[0]["id"] == "cve-2024-0001" + assert rules[0]["name"] == "Reflected Cross-Site Scripting" + + # Assert result mapping and location parity from the new _extract_sarif_locations helper + results = sarif_data["runs"][0]["results"] + assert len(results) == 1 + assert results[0]["ruleId"] == "cve-2024-0001" + assert results[0]["level"] == "error" + assert results[0]["message"]["text"] == "User input is reflected without sanitization." + assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://example.com/search" + +def test_html_web_output_parity(sample_scan_data): + """Proves Web HTML output correctly injects modularized markup from _build_web_finding_markup.""" + task, result = sample_scan_data + html_str = reporting.generate_html_report(task, result) + + # Assert the modularized finding block rendered correctly with all data + assert "Reflected Cross-Site Scripting" in html_str + assert "severity-high" in html_str + assert "User input is reflected without sanitization." in html_str + assert "
    <script>alert(1)</script>
    " in html_str # Checks HTML escaping parity + assert "Escape all user-supplied input." in html_str + assert "CVE-2024-0001" in html_str + +def test_html_pdf_output_parity(sample_scan_data): + """Proves PDF HTML output correctly injects modularized markup from _build_pdf_finding_markup.""" + task, result = sample_scan_data + + # Test the internal HTML generator for the PDF to verify string parity + pdf_html_str = reporting._generate_pdf_html_report(task, result) + + # Assert table-based PDF markup rendered correctly + assert "
    " in pdf_html_str + assert "Reflected Cross-Site Scripting" in pdf_html_str + assert "User input is reflected without sanitization." in pdf_html_str \ No newline at end of file