diff --git a/app/rendering/pipeline.py b/app/rendering/pipeline.py
index 9b3a4f9..fc0c6b8 100644
--- a/app/rendering/pipeline.py
+++ b/app/rendering/pipeline.py
@@ -1,16 +1,47 @@
"""Orchestrate the full report → PDF pipeline."""
from __future__ import annotations
+import html
+import re
from html.parser import HTMLParser
from pathlib import Path
from ..reporting import ReportTemplate
+from ..reporting.evidence import collect_evidence
from .chromium import render_to_html
from .resources import build
from .weasyprint import render_to_pdf
BUNDLE = Path(__file__).parents[2] / "packages" / "rendering" / "dist" / "bundle.js"
+# Matches Ghostwriter's old-dot-var syntax: {{.name}}, {{.ref name}}, {{.caption name}}
+_GW_TAG_RE = re.compile(r"\{\{\s*\.([^\{\}]*?)\s*\}\}")
+
+# Matches Ghostwriter's newer TinyMCE richtext evidence div:
+#
+# Attribute order may vary; this handles both orderings.
+_GW_RICHTEXT_RE = re.compile(
+ r']*\bclass="richtext-evidence"[^>]*\bdata-evidence-id="(\d+)"[^>]*>\s*
'
+ r'|'
+ r']*\bdata-evidence-id="(\d+)"[^>]*\bclass="richtext-evidence"[^>]*>\s*
'
+)
+
+# Rich-text finding fields that Ghostwriter allows inline evidence in.
+# "title" is intentionally excluded — it is plain text, never richtext.
+# report.extra_fields are also excluded; Ghostwriter does not support inline
+# evidence there, so we leave those fields untouched.
+_FINDING_TEXT_FIELDS = (
+ "affected_entities",
+ "description",
+ "impact",
+ "mitigation",
+ "recommendation",
+ "replication_steps",
+ "host_detection_techniques",
+ "network_detection_techniques",
+ "references",
+)
+
_SEVERITY: dict[str, tuple[int, str]] = {
"critical": (1, "critical"),
"high": (2, "high"),
@@ -20,6 +51,105 @@
"info": (5, "info"),
}
+def _build_evidence_index(report_json: dict) -> tuple[dict[str, dict], dict[int, dict]]:
+ """Build two evidence lookups from the report JSON:
+ by_name: friendly_name → evidence object (for {{.name}} tags)
+ by_id: numeric id → evidence object (for richtext-evidence divs)
+ """
+ by_name: dict[str, dict] = {}
+ by_id: dict[int, dict] = {}
+ for ev in collect_evidence(report_json):
+ fn = ev.get("friendly_name")
+ if isinstance(fn, str) and fn:
+ by_name[fn] = ev
+ by_id[ev["id"]] = ev
+ return by_name, by_id
+
+
+def _resolve_inline_evidence(text: str, ev_index: dict[str, dict]) -> tuple[str, set[int]]:
+ """Replace Ghostwriter inline evidence tags in an HTML field.
+
+ Ghostwriter stores two tag forms in rich-text fields that are shipped as-is
+ in the generateReport JSON (Jinja2 is not applied for JSON export):
+
+ {{.friendly_name}} → inline evidence image
+ {{.ref friendly_name}} → text reference (friendly name / caption)
+ {{.caption friendly_name}} → caption label for the figure
+
+ Returns (resolved_text, set of evidence IDs that were embedded as images).
+ """
+ if not ev_index or not text or "{{" not in text:
+ return text, set()
+
+ used_ids: set[int] = set()
+
+ def _replace(m: re.Match) -> str:
+ contents = m.group(1).strip()
+
+ if contents.startswith("ref "):
+ name = contents[4:].strip()
+ ev = ev_index.get(name)
+ if ev:
+ return html.escape(ev.get("caption") or ev.get("friendly_name") or name)
+ return html.escape(name)
+
+ if contents.startswith("caption "):
+ name = contents[8:].strip()
+ ev = ev_index.get(name)
+ if ev:
+ return html.escape(ev.get("caption") or ev.get("friendly_name") or name)
+ return html.escape(name)
+
+ # Plain {{.name}} → inline evidence image wrapped in figure/figcaption
+ name = contents
+ ev = ev_index.get(name)
+ if ev and ev.get("path"):
+ used_ids.add(ev["id"])
+ caption = html.escape(ev.get("caption") or ev.get("friendly_name") or name)
+ path = html.escape(ev["path"])
+ return (
+ f''
+ f'
'
+ f'{caption}'
+ f''
+ )
+ return m.group(0) # unknown name — leave unchanged
+
+ return _GW_TAG_RE.sub(_replace, text), used_ids
+
+
+def _resolve_richtext_evidence(text: str, ev_by_id: dict[int, dict]) -> tuple[str, set[int]]:
+ """Replace Ghostwriter richtext-evidence divs with figure/img/figcaption.
+
+ Newer Ghostwriter TinyMCE versions store inline evidence as:
+
+ instead of the older {{.friendly_name}} tag syntax.
+
+ Returns (resolved_text, set of evidence IDs that were embedded as images).
+ """
+ if not ev_by_id or not text or 'richtext-evidence' not in text:
+ return text, set()
+
+ used_ids: set[int] = set()
+
+ def _replace(m: re.Match) -> str:
+ eid_str = m.group(1) or m.group(2)
+ ev = ev_by_id.get(int(eid_str))
+ if ev and ev.get("path"):
+ used_ids.add(ev["id"])
+ caption = html.escape(ev.get("caption") or ev.get("friendly_name") or eid_str)
+ path = html.escape(ev["path"])
+ return (
+ f''
+ f'
'
+ f'{caption}'
+ f''
+ )
+ return m.group(0) # unknown id — leave unchanged
+
+ return _GW_RICHTEXT_RE.sub(_replace, text), used_ids
+
+
class _TextExtractor(HTMLParser):
def __init__(self):
super().__init__()
@@ -61,6 +191,8 @@ def make_vue_data(raw: dict) -> dict:
normalised to None so templates can use a simple truthiness check to
conditionally render optional sections.
"""
+ ev_by_name, ev_by_id = _build_evidence_index(raw)
+
findings = []
for f in raw.get("findings") or []:
f = dict(f)
@@ -72,6 +204,22 @@ def make_vue_data(raw: dict) -> dict:
"score": float(f.get("cvss_score") or 0),
"vector": f.get("cvss_vector") or "n/a",
}
+ if ev_by_name or ev_by_id:
+ inline_ids: set[int] = set()
+ for field in _FINDING_TEXT_FIELDS:
+ raw_val = f.get(field)
+ if not isinstance(raw_val, str):
+ continue
+ resolved = raw_val
+ if "{{" in resolved:
+ resolved, used = _resolve_inline_evidence(resolved, ev_by_name)
+ inline_ids |= used
+ if "richtext-evidence" in resolved:
+ resolved, used = _resolve_richtext_evidence(resolved, ev_by_id)
+ inline_ids |= used
+ f[field] = resolved
+ if inline_ids and isinstance(f.get("evidence"), list):
+ f["evidence"] = [ev for ev in f["evidence"] if ev.get("id") not in inline_ids]
findings.append(f)
report = dict(raw)
@@ -107,5 +255,5 @@ def render_report(
bundle_js = BUNDLE.read_text("utf-8")
resources = build(template, report_json)
- html = render_to_html(data, template_html, css, bundle_js, language, resources)
- return render_to_pdf(html, resources)
+ rendered_html = render_to_html(data, template_html, css, bundle_js, language, resources)
+ return render_to_pdf(rendered_html, resources)
diff --git a/app/reporting/evidence.py b/app/reporting/evidence.py
index 79b0043..e24d46f 100644
--- a/app/reporting/evidence.py
+++ b/app/reporting/evidence.py
@@ -18,23 +18,32 @@ def local_path(evidence_path: str) -> Path:
return _EVIDENCE_DIR / Path(evidence_path).relative_to("evidence")
-def collect_paths(obj: object) -> dict[str, int]:
+def collect_evidence(obj: object) -> list[dict]:
"""Recursively find all evidence objects in the report JSON.
- Returns a mapping of path -> evidence_id, e.g. {"evidence/2/foo.png": 3}.
+ An evidence object is any dict with a 'path' starting with 'evidence/'
+ and an integer 'id'.
"""
- paths: dict[str, int] = {}
+ found: list[dict] = []
if isinstance(obj, dict):
p = obj.get("path")
eid = obj.get("id")
if isinstance(p, str) and p.startswith("evidence/") and isinstance(eid, int):
- paths[p] = eid
+ found.append(obj)
for v in obj.values():
- paths |= collect_paths(v)
+ found.extend(collect_evidence(v))
elif isinstance(obj, list):
for item in obj:
- paths |= collect_paths(item)
- return paths
+ found.extend(collect_evidence(item))
+ return found
+
+
+def collect_paths(obj: object) -> dict[str, int]:
+ """Recursively find all evidence objects in the report JSON.
+
+ Returns a mapping of path -> evidence_id, e.g. {"evidence/2/foo.png": 3}.
+ """
+ return {ev["path"]: ev["id"] for ev in collect_evidence(obj)}
def _fetch_and_save(client: GhostwriterClient, evidence_id: int, path: str, media_path: Path | None) -> tuple[str, bool]:
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
new file mode 100644
index 0000000..9777643
--- /dev/null
+++ b/tests/test_pipeline.py
@@ -0,0 +1,208 @@
+import pytest
+
+from app.rendering.pipeline import (
+ _build_evidence_index,
+ _resolve_inline_evidence,
+ _resolve_richtext_evidence,
+ make_vue_data,
+)
+
+_EV1 = {
+ "id": 1,
+ "path": "evidence/1/shot.png",
+ "friendly_name": "login_page",
+ "caption": "Login page screenshot",
+}
+_EV2 = {
+ "id": 2,
+ "path": "evidence/2/admin.png",
+ "friendly_name": "admin_panel",
+ "caption": None,
+}
+_REPORT = {"findings": [{"evidence": [_EV1, _EV2]}]}
+
+
+class TestBuildEvidenceIndex:
+ def test_indexes_by_name_and_id(self):
+ by_name, by_id = _build_evidence_index(_REPORT)
+ assert by_name["login_page"] is _EV1
+ assert by_id[1] is _EV1
+ assert by_name["admin_panel"] is _EV2
+ assert by_id[2] is _EV2
+
+ def test_empty_report_returns_empty_indexes(self):
+ by_name, by_id = _build_evidence_index({})
+ assert by_name == {}
+ assert by_id == {}
+
+ def test_evidence_without_friendly_name_indexed_by_id_only(self):
+ ev = {"id": 3, "path": "evidence/3/x.png"}
+ by_name, by_id = _build_evidence_index({"ev": ev})
+ assert 3 in by_id
+ assert by_name == {}
+
+
+class TestResolveInlineEvidence:
+ @pytest.fixture(autouse=True)
+ def index(self):
+ self.idx = {"login_page": _EV1, "admin_panel": _EV2}
+
+ def test_plain_name_produces_figure(self):
+ result, ids = _resolve_inline_evidence("{{.login_page}}", self.idx)
+ assert '
Login page screenshot" in result
+ assert ids == {1}
+
+ def test_plain_name_with_whitespace(self):
+ result, ids = _resolve_inline_evidence("{{. login_page }}", self.idx)
+ assert '
xss"}
+ result, ids = _resolve_inline_evidence("{{.x}}", {"x": ev})
+ assert ""}
+ div = ''
+ result, ids = _resolve_richtext_evidence(div, {5: ev})
+ assert "