From 68f6c2d807ff8097acdb60c78f62ee435ccc9377 Mon Sep 17 00:00:00 2001
From: TheGr3atJosh <90441217+TheGr3atJosh@users.noreply.github.com>
Date: Sat, 9 May 2026 21:55:06 +0200
Subject: [PATCH 1/6] feat: resolve Ghostwriter richtext-evidence divs to
inline images
Ghostwriter's TinyMCE stores inline evidence as:
The previous resolver only handled the older {{.friendly_name}} tag
syntax, which is not emitted by current Ghostwriter versions. This
caused inline evidence to silently disappear from rendered PDFs.
Changes:
- _build_evidence_index now returns two indexes: by friendly_name and
by numeric id, built in a single JSON walk
- _resolve_richtext_evidence replaces richtext-evidence divs with
using the id-based index
- Both resolvers run sequentially per field in make_vue_data; the
{{.name}} resolver is kept for backwards compatibility
Co-Authored-By: Claude Sonnet 4.6
---
app/rendering/pipeline.py | 151 +++++++++++++++++++++++++++++++++++++-
1 file changed, 149 insertions(+), 2 deletions(-)
diff --git a/app/rendering/pipeline.py b/app/rendering/pipeline.py
index 9b3a4f9..edadda8 100644
--- a/app/rendering/pipeline.py
+++ b/app/rendering/pipeline.py
@@ -1,6 +1,8 @@
"""Orchestrate the full report → PDF pipeline."""
from __future__ import annotations
+import html
+import re
from html.parser import HTMLParser
from pathlib import Path
@@ -11,6 +13,32 @@
BUNDLE = Path(__file__).parents[2] / "packages" / "rendering" / "dist" / "bundle.js"
+# Matches Ghostwriter's old-dot-var syntax: {{.name}}, {{.ref name}}, {{.caption name}}
+_GW_TAG_RE = re.compile(r"\{\{\s*\.([^\{\}]*?)\s*\}\}")
+
+# Matches Ghostwriter's newer TinyMCE richtext evidence div:
+#
+# Attribute order may vary; this handles both orderings.
+_GW_RICHTEXT_RE = re.compile(
+ r']*\bclass="richtext-evidence"[^>]*\bdata-evidence-id="(\d+)"[^>]*>\s*
'
+ r'|'
+ r']*\bdata-evidence-id="(\d+)"[^>]*\bclass="richtext-evidence"[^>]*>\s*
'
+)
+
+# Finding text fields that can contain inline evidence references (mirrors Ghostwriter's allowlist)
+_FINDING_TEXT_FIELDS = (
+ "title",
+ "affected_entities",
+ "description",
+ "impact",
+ "mitigation",
+ "recommendation",
+ "replication_steps",
+ "host_detection_techniques",
+ "network_detection_techniques",
+ "references",
+)
+
_SEVERITY: dict[str, tuple[int, str]] = {
"critical": (1, "critical"),
"high": (2, "high"),
@@ -20,6 +48,112 @@
"info": (5, "info"),
}
+def _build_evidence_index(report_json: dict) -> tuple[dict[str, dict], dict[int, dict]]:
+ """Walk the report JSON and return two evidence lookups:
+ by_name: friendly_name → evidence object (for {{.name}} tags)
+ by_id: numeric id → evidence object (for richtext-evidence divs)
+ """
+ by_name: dict[str, dict] = {}
+ by_id: dict[int, dict] = {}
+
+ def _walk(obj: object) -> None:
+ if isinstance(obj, dict):
+ p = obj.get("path")
+ fn = obj.get("friendly_name")
+ eid = obj.get("id")
+ if (
+ isinstance(p, str) and p.startswith("evidence/")
+ and isinstance(eid, int)
+ and isinstance(fn, str) and fn
+ ):
+ by_name[fn] = obj
+ by_id[eid] = obj
+ for v in obj.values():
+ _walk(v)
+ elif isinstance(obj, list):
+ for item in obj:
+ _walk(item)
+
+ _walk(report_json)
+ return by_name, by_id
+
+
+def _resolve_inline_evidence(text: str, ev_index: dict[str, dict]) -> str:
+ """Replace Ghostwriter inline evidence tags in an HTML field.
+
+ Ghostwriter stores two tag forms in rich-text fields that are shipped as-is
+ in the generateReport JSON (Jinja2 is not applied for JSON export):
+
+ {{.friendly_name}} → inline evidence image
+ {{.ref friendly_name}} → text reference (friendly name / caption)
+ {{.caption friendly_name}} → caption label for the figure
+
+ We replace them with HTML that the Vue renderer can handle.
+ """
+ if not ev_index or not text or "{{" not in text:
+ return text
+
+ def _replace(m: re.Match) -> str:
+ contents = m.group(1).strip()
+
+ if contents.startswith("ref "):
+ name = contents[4:].strip()
+ ev = ev_index.get(name)
+ if ev:
+ return html.escape(ev.get("caption") or ev.get("friendly_name") or name)
+ return html.escape(name)
+
+ if contents.startswith("caption "):
+ name = contents[8:].strip()
+ ev = ev_index.get(name)
+ if ev:
+ return html.escape(ev.get("caption") or ev.get("friendly_name") or name)
+ return html.escape(name)
+
+ # Plain {{.name}} → inline evidence image wrapped in figure/figcaption
+ name = contents
+ ev = ev_index.get(name)
+ if ev and ev.get("path"):
+ caption = html.escape(ev.get("caption") or ev.get("friendly_name") or name)
+ path = ev["path"]
+ return (
+ f''
+ f' '
+ f'{caption} '
+ f' '
+ )
+ return m.group(0) # unknown name — leave unchanged
+
+ return _GW_TAG_RE.sub(_replace, text)
+
+
+def _resolve_richtext_evidence(text: str, ev_by_id: dict[int, dict]) -> str:
+ """Replace Ghostwriter richtext-evidence divs with figure/img/figcaption.
+
+ Newer Ghostwriter TinyMCE versions store inline evidence as:
+
+ instead of the older {{.friendly_name}} tag syntax.
+ """
+ if not ev_by_id or not text or 'richtext-evidence' not in text:
+ return text
+
+ def _replace(m: re.Match) -> str:
+ eid_str = m.group(1) or m.group(2)
+ ev = ev_by_id.get(int(eid_str))
+ if ev and ev.get("path"):
+ caption = html.escape(ev.get("caption") or ev.get("friendly_name") or eid_str)
+ path = ev["path"]
+ return (
+ f''
+ f' '
+ f'{caption} '
+ f' '
+ )
+ return m.group(0) # unknown id — leave unchanged
+
+ return _GW_RICHTEXT_RE.sub(_replace, text)
+
+
class _TextExtractor(HTMLParser):
def __init__(self):
super().__init__()
@@ -61,6 +195,8 @@ def make_vue_data(raw: dict) -> dict:
normalised to None so templates can use a simple truthiness check to
conditionally render optional sections.
"""
+ ev_by_name, ev_by_id = _build_evidence_index(raw)
+
findings = []
for f in raw.get("findings") or []:
f = dict(f)
@@ -72,6 +208,17 @@ def make_vue_data(raw: dict) -> dict:
"score": float(f.get("cvss_score") or 0),
"vector": f.get("cvss_vector") or "n/a",
}
+ if ev_by_name or ev_by_id:
+ for field in _FINDING_TEXT_FIELDS:
+ raw_val = f.get(field)
+ if not isinstance(raw_val, str):
+ continue
+ resolved = raw_val
+ if "{{" in resolved:
+ resolved = _resolve_inline_evidence(resolved, ev_by_name)
+ if "richtext-evidence" in resolved:
+ resolved = _resolve_richtext_evidence(resolved, ev_by_id)
+ f[field] = resolved
findings.append(f)
report = dict(raw)
@@ -107,5 +254,5 @@ def render_report(
bundle_js = BUNDLE.read_text("utf-8")
resources = build(template, report_json)
- html = render_to_html(data, template_html, css, bundle_js, language, resources)
- return render_to_pdf(html, resources)
+ rendered_html = render_to_html(data, template_html, css, bundle_js, language, resources)
+ return render_to_pdf(rendered_html, resources)
From 0588fa77c31bf7a34a1f7ccfe98af87ff554e903 Mon Sep 17 00:00:00 2001
From: onur <67955086+otuva@users.noreply.github.com>
Date: Tue, 19 May 2026 10:16:09 +0300
Subject: [PATCH 2/6] fix: escape evidence path in img src to prevent attribute
injection
---
app/rendering/pipeline.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/app/rendering/pipeline.py b/app/rendering/pipeline.py
index edadda8..463b8e2 100644
--- a/app/rendering/pipeline.py
+++ b/app/rendering/pipeline.py
@@ -115,7 +115,7 @@ def _replace(m: re.Match) -> str:
ev = ev_index.get(name)
if ev and ev.get("path"):
caption = html.escape(ev.get("caption") or ev.get("friendly_name") or name)
- path = ev["path"]
+ path = html.escape(ev["path"])
return (
f''
f' '
@@ -142,7 +142,7 @@ def _replace(m: re.Match) -> str:
ev = ev_by_id.get(int(eid_str))
if ev and ev.get("path"):
caption = html.escape(ev.get("caption") or ev.get("friendly_name") or eid_str)
- path = ev["path"]
+ path = html.escape(ev["path"])
return (
f''
f' '
From b1089e9e1e6744f43bdee1a0b2be2ebadad0eddb Mon Sep 17 00:00:00 2001
From: onur <67955086+otuva@users.noreply.github.com>
Date: Tue, 19 May 2026 10:16:39 +0300
Subject: [PATCH 3/6] refactor: consolidate evidence JSON walking into
collect_evidence
_build_evidence_index was duplicating the same recursive walk as
collect_paths in evidence.py. Extract collect_evidence() as the shared
primitive; both callers now iterate its results instead of re-implementing
the tree walk.
---
app/rendering/pipeline.py | 28 +++++++---------------------
app/reporting/evidence.py | 23 ++++++++++++++++-------
2 files changed, 23 insertions(+), 28 deletions(-)
diff --git a/app/rendering/pipeline.py b/app/rendering/pipeline.py
index 463b8e2..ef4802b 100644
--- a/app/rendering/pipeline.py
+++ b/app/rendering/pipeline.py
@@ -7,6 +7,7 @@
from pathlib import Path
from ..reporting import ReportTemplate
+from ..reporting.evidence import collect_evidence
from .chromium import render_to_html
from .resources import build
from .weasyprint import render_to_pdf
@@ -49,32 +50,17 @@
}
def _build_evidence_index(report_json: dict) -> tuple[dict[str, dict], dict[int, dict]]:
- """Walk the report JSON and return two evidence lookups:
+ """Build two evidence lookups from the report JSON:
by_name: friendly_name → evidence object (for {{.name}} tags)
by_id: numeric id → evidence object (for richtext-evidence divs)
"""
by_name: dict[str, dict] = {}
by_id: dict[int, dict] = {}
-
- def _walk(obj: object) -> None:
- if isinstance(obj, dict):
- p = obj.get("path")
- fn = obj.get("friendly_name")
- eid = obj.get("id")
- if (
- isinstance(p, str) and p.startswith("evidence/")
- and isinstance(eid, int)
- and isinstance(fn, str) and fn
- ):
- by_name[fn] = obj
- by_id[eid] = obj
- for v in obj.values():
- _walk(v)
- elif isinstance(obj, list):
- for item in obj:
- _walk(item)
-
- _walk(report_json)
+ for ev in collect_evidence(report_json):
+ fn = ev.get("friendly_name")
+ if isinstance(fn, str) and fn:
+ by_name[fn] = ev
+ by_id[ev["id"]] = ev
return by_name, by_id
diff --git a/app/reporting/evidence.py b/app/reporting/evidence.py
index 79b0043..e24d46f 100644
--- a/app/reporting/evidence.py
+++ b/app/reporting/evidence.py
@@ -18,23 +18,32 @@ def local_path(evidence_path: str) -> Path:
return _EVIDENCE_DIR / Path(evidence_path).relative_to("evidence")
-def collect_paths(obj: object) -> dict[str, int]:
+def collect_evidence(obj: object) -> list[dict]:
"""Recursively find all evidence objects in the report JSON.
- Returns a mapping of path -> evidence_id, e.g. {"evidence/2/foo.png": 3}.
+ An evidence object is any dict with a 'path' starting with 'evidence/'
+ and an integer 'id'.
"""
- paths: dict[str, int] = {}
+ found: list[dict] = []
if isinstance(obj, dict):
p = obj.get("path")
eid = obj.get("id")
if isinstance(p, str) and p.startswith("evidence/") and isinstance(eid, int):
- paths[p] = eid
+ found.append(obj)
for v in obj.values():
- paths |= collect_paths(v)
+ found.extend(collect_evidence(v))
elif isinstance(obj, list):
for item in obj:
- paths |= collect_paths(item)
- return paths
+ found.extend(collect_evidence(item))
+ return found
+
+
+def collect_paths(obj: object) -> dict[str, int]:
+ """Recursively find all evidence objects in the report JSON.
+
+ Returns a mapping of path -> evidence_id, e.g. {"evidence/2/foo.png": 3}.
+ """
+ return {ev["path"]: ev["id"] for ev in collect_evidence(obj)}
def _fetch_and_save(client: GhostwriterClient, evidence_id: int, path: str, media_path: Path | None) -> tuple[str, bool]:
From 0ee6af39c9b00ed84fe9a7e189b880751fb4154c Mon Sep 17 00:00:00 2001
From: onur <67955086+otuva@users.noreply.github.com>
Date: Tue, 19 May 2026 10:16:54 +0300
Subject: [PATCH 4/6] fix: remove title from _FINDING_TEXT_FIELDS; document
extra_fields exclusion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
title is plain text in Ghostwriter — inline evidence tags are never stored
there, and resolving them would silently mangle any finding name that
happens to contain {{...}} syntax. Added comments to explain why both
title and extra_fields are excluded from evidence resolution.
---
app/rendering/pipeline.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/app/rendering/pipeline.py b/app/rendering/pipeline.py
index ef4802b..7407431 100644
--- a/app/rendering/pipeline.py
+++ b/app/rendering/pipeline.py
@@ -26,9 +26,11 @@
r']*\bdata-evidence-id="(\d+)"[^>]*\bclass="richtext-evidence"[^>]*>\s*
'
)
-# Finding text fields that can contain inline evidence references (mirrors Ghostwriter's allowlist)
+# Rich-text finding fields that Ghostwriter allows inline evidence in.
+# "title" is intentionally excluded — it is plain text, never richtext.
+# report.extra_fields are also excluded; Ghostwriter does not support inline
+# evidence there, so we leave those fields untouched.
_FINDING_TEXT_FIELDS = (
- "title",
"affected_entities",
"description",
"impact",
From a2cad351ad678de720e7748520acc016900bee96 Mon Sep 17 00:00:00 2001
From: onur <67955086+otuva@users.noreply.github.com>
Date: Tue, 19 May 2026 10:17:26 +0300
Subject: [PATCH 5/6] test: add unit tests for evidence index and resolution
functions
Covers _build_evidence_index, _resolve_inline_evidence, and
_resolve_richtext_evidence: happy paths, both attribute orderings for the
richtext div regex, caption/friendly_name fallback, unknown name/id
passthrough, and HTML escaping of both path and caption.
---
tests/test_pipeline.py | 138 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 138 insertions(+)
create mode 100644 tests/test_pipeline.py
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
new file mode 100644
index 0000000..5a3f9d3
--- /dev/null
+++ b/tests/test_pipeline.py
@@ -0,0 +1,138 @@
+import pytest
+
+from app.rendering.pipeline import (
+ _build_evidence_index,
+ _resolve_inline_evidence,
+ _resolve_richtext_evidence,
+)
+
+_EV1 = {
+ "id": 1,
+ "path": "evidence/1/shot.png",
+ "friendly_name": "login_page",
+ "caption": "Login page screenshot",
+}
+_EV2 = {
+ "id": 2,
+ "path": "evidence/2/admin.png",
+ "friendly_name": "admin_panel",
+ "caption": None,
+}
+_REPORT = {"findings": [{"evidence": [_EV1, _EV2]}]}
+
+
+class TestBuildEvidenceIndex:
+ def test_indexes_by_name_and_id(self):
+ by_name, by_id = _build_evidence_index(_REPORT)
+ assert by_name["login_page"] is _EV1
+ assert by_id[1] is _EV1
+ assert by_name["admin_panel"] is _EV2
+ assert by_id[2] is _EV2
+
+ def test_empty_report_returns_empty_indexes(self):
+ by_name, by_id = _build_evidence_index({})
+ assert by_name == {}
+ assert by_id == {}
+
+ def test_evidence_without_friendly_name_indexed_by_id_only(self):
+ ev = {"id": 3, "path": "evidence/3/x.png"}
+ by_name, by_id = _build_evidence_index({"ev": ev})
+ assert 3 in by_id
+ assert by_name == {}
+
+
+class TestResolveInlineEvidence:
+ @pytest.fixture(autouse=True)
+ def index(self):
+ self.idx = {"login_page": _EV1, "admin_panel": _EV2}
+
+ def test_plain_name_produces_figure(self):
+ result = _resolve_inline_evidence("{{.login_page}}", self.idx)
+ assert ' Login page screenshot " in result
+
+ def test_plain_name_with_whitespace(self):
+ result = _resolve_inline_evidence("{{. login_page }}", self.idx)
+ assert ' xss"}
+ result = _resolve_inline_evidence("{{.x}}", {"x": ev})
+ assert ""}
+ div = '
'
+ result = _resolve_richtext_evidence(div, {5: ev})
+ assert ""}
- result = _resolve_inline_evidence("{{.x}}", {"x": ev})
+ result, ids = _resolve_inline_evidence("{{.x}}", {"x": ev})
assert ""}
div = '
'
- result = _resolve_richtext_evidence(div, {5: ev})
+ result, ids = _resolve_richtext_evidence(div, {5: ev})
assert "