diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py
index c34a4bca..6e7df37f 100644
--- a/claude_code_log/html/renderer.py
+++ b/claude_code_log/html/renderer.py
@@ -943,7 +943,15 @@ def title_ToolUseMessage(
content.input, AskUserQuestionInput
) and self._paired_answer_supersedes(message):
return ""
- return super().title_ToolUseMessage(content, message)
+ # Specialized tools dispatch to a title_*Input method that escapes via
+ # ``_tool_title``. Tools with NO specialized method (generic / mcp__* /
+ # ToolSearch / custom) fall back to the raw tool name — which is
+ # attacker-controllable and lands live in the header span. Escape it
+ # here rather than in the shared base ``title_ToolUseMessage`` (the
+ # Markdown renderer must not get HTML-entity-escaped titles). #245 XSS.
+ if title := self._dispatch_title(content.input, message):
+ return title
+ return escape_html(content.tool_name)
def title_ToolResultMessage(
self, content: ToolResultMessage, message: TemplateMessage
@@ -964,6 +972,39 @@ def title_ToolResultMessage(
return f"{base} {marker}" if base else marker
return base
+ # Title overrides that escape their transcript-derived field for the HTML
+ # header span. These titles are built on the shared base ``Renderer`` (also
+ # used by the Markdown renderer, which must NOT receive HTML-entity-escaped
+ # titles), so the escaping lives on the HTML path only — mirroring how
+ # ``_tool_title`` escapes the tool name for specialized tools. #245 XSS.
+
+ def title_HookAttachmentMessage(
+ self, content: HookAttachmentMessage, _: TemplateMessage
+ ) -> str:
+ # ``hook_name`` (e.g. "PostToolUse:TaskUpdate") is transcript-derived
+ # and lands in the header; escape it.
+ label = content.hook_name or content.hook_event or content.kind
+ return f"Hook · {escape_html(label)}"
+
+ def title_WorkflowPhaseMessage(
+ self, content: WorkflowPhaseMessage, _: TemplateMessage
+ ) -> str:
+ return f"Phase: {escape_html(content.title)}" if content.title else "Phase"
+
+ def title_WorkflowAgentMessage(
+ self, content: WorkflowAgentMessage, _: TemplateMessage
+ ) -> str:
+ return f"Agent {escape_html(content.label)}" if content.label else "Agent"
+
+ def title_SystemMessage(self, content: SystemMessage, _: TemplateMessage) -> str:
+ # ``level`` is FREE-TEXT from the transcript (``system_factory``:
+ # ``transcript.level or "info"``), not an enum — so it can carry a
+ # payload that lands in the header. Title-case the RAW level FIRST,
+ # then escape: escaping first would let ``.title()`` capitalize the
+ # entity prefixes (``<`` → ``≪``) and break the escaping. #245 XSS.
+ level = content.level or "unknown"
+ return f"System {escape_html(level.title())}"
+
def title_TaskInput(self, input: TaskInput, message: TemplateMessage) -> str:
"""Title → '🔧 Task (subagent_type) [async #]'.
diff --git a/claude_code_log/html/utils.py b/claude_code_log/html/utils.py
index 632fa2c6..1575c75c 100644
--- a/claude_code_log/html/utils.py
+++ b/claude_code_log/html/utils.py
@@ -424,7 +424,24 @@ def block_code(code: str, info: Optional[str] = None) -> str:
@functools.lru_cache(maxsize=1)
def _get_markdown_renderer() -> mistune.Markdown:
- """Get cached Mistune markdown renderer with Pygments syntax highlighting."""
+ """Get cached Mistune markdown renderer with Pygments syntax highlighting.
+
+ Uses ``escape=True`` so raw HTML embedded in the source text
+ (```` into the field" —
+ so rendering it unescaped lets that payload execute when the transcript
+ HTML is opened. The Markdown output path already neutralises raw HTML
+ from every source (see ``markdown/renderer.py::_protect_html_tags``);
+ the HTML path must match. ``escape=True`` does not affect Markdown
+ formatting, plugin output (Pygments, SHA links), or code fences — only
+ raw HTML tags in the body.
+ """
from ..markdown_plugins import make_codespan_sha_plugin, make_sha_plugin
from ..git_remote import resolve_sha_for_current_render
@@ -447,7 +464,7 @@ def _get_markdown_renderer() -> mistune.Markdown:
# mistune's built-in rule consumes the backticks.
make_codespan_sha_plugin(resolve_sha_for_current_render),
],
- escape=False, # Don't escape HTML since we want to render markdown properly
+ escape=True, # Escape raw HTML: transcript content is untrusted (XSS)
hard_wrap=True, # Line break for newlines (checklists in Assistant messages)
)
@@ -490,11 +507,13 @@ def render_markdown_inline(text: str) -> str:
def _get_user_markdown_renderer() -> mistune.Markdown:
"""Markdown renderer for user-authored text.
- Differs from the shared renderer in one critical way: ``escape=True``
- so a user typing raw ``` into
+the field"* lands that payload in assistant prose, a tool result, and a
+Write tool's file content. If it reaches the HTML unescaped it executes when
+the file is opened. There is no "trusted" source here.
+
+Two safe paths, depending on what you emit:
+
+- **Building HTML with f-strings/format** → run every interpolated value
+ through `escape_html()` first (as the input formatter above does with
+ `escaped_query`). Never interpolate a raw field into markup.
+- **Rendering markdown** → use `render_markdown` / `render_markdown_collapsible`.
+ Both use mistune with `escape=True`, so raw HTML tags in the body are
+ escaped to entities and unsafe link/image schemes (`javascript:`, `data:`)
+ are neutralised, while Markdown, code fences and Pygments still render.
+
+Regression coverage lives in `test/test_markdown_rendering.py` (unit) and
+`test/test_xss_browser.py` (empirical: opens the file in a real browser and
+asserts no `alert()` dialog fires). Add a payload-bearing case for any new
+field you render.
+
### Update Exports
Add functions to `__all__`:
diff --git a/dev-docs/plugins.md b/dev-docs/plugins.md
index f8136750..eb492008 100644
--- a/dev-docs/plugins.md
+++ b/dev-docs/plugins.md
@@ -212,6 +212,7 @@ renderer's dispatcher consults them after the renderer's own
from dataclasses import dataclass
from typing import ClassVar, Optional
from claude_code_log.models import DetailLevel, ToolUseMessage
+from claude_code_log.plugins import escape_html, safe_markdown_inline
@dataclass
class MyToolMessage(ToolUseMessage):
@@ -220,23 +221,34 @@ class MyToolMessage(ToolUseMessage):
detail_visibility: ClassVar[DetailLevel] = DetailLevel.LOW
def format_markdown(self, _renderer, _message) -> str:
+ # ``action`` is transcript-derived (untrusted — see §4.2). Interpolated
+ # into Markdown SOURCE, so neutralise raw HTML with safe_markdown_inline
+ # (the Markdown-output path emits this verbatim).
action = (self.input.input or {}).get("action", "?")
- return f"_(my plugin) action={action}_"
+ return f"_(my plugin) action={safe_markdown_inline(action)}_"
def format_html(self, _renderer, _message) -> Optional[str]:
return None # fall back to mistune(format_markdown)
def title(self, _renderer, _message) -> Optional[str]:
- return "✉ my plugin"
+ # A title() return goes to ``{{ message_title | safe }}`` with NO core
+ # HTML escaping — escape transcript-derived interpolation yourself
+ # (§4.2). The Markdown heading path is auto-gated by the core.
+ action = (self.input.input or {}).get("action", "?")
+ return f"✉ my plugin: {escape_html(action)}"
```
+> The constant-string forms (`"✉ my plugin"`, `"_(my plugin)_"`) need no
+> escaping — only **transcript-derived interpolation** does. See
+> [§4.2](#42-security-conscious-rendering) for the full contract.
+
Signature contract for each method:
| Method | Signature | Return | Notes |
|---|---|---|---|
-| `format_markdown` | `(self, renderer, message) -> str` | Markdown source string. | Define this whenever your class produces meaningful Markdown. Drives both Markdown output AND HTML output (via mistune) unless `format_html` is also defined. |
-| `format_html` | `(self, renderer, message) -> str` | Raw HTML string (real string — no None sentinel). | Define this ONLY when you need HTML different from mistune-of-`format_markdown`. The dispatcher synthesizes that fallback automatically when `format_html` is absent. |
-| `title` | `(self, renderer, message) -> Optional[str]` | Heading text or `None`. | Return `None` for "headless" (inline) messages. Return `""` (empty string, not None) to suppress the heading explicitly — the dispatcher distinguishes the two. |
+| `format_markdown` | `(self, renderer, message) -> str` | Markdown source string. | Define this whenever your class produces meaningful Markdown. Drives both Markdown output AND HTML output (via mistune) unless `format_html` is also defined. **Escape transcript-derived interpolation** ([§4.2](#42-security-conscious-rendering)): the Markdown-output path emits this verbatim. |
+| `format_html` | `(self, renderer, message) -> str` | Raw HTML string (real string — no None sentinel). | Define this ONLY when you need HTML different from mistune-of-`format_markdown`. The dispatcher synthesizes that fallback automatically when `format_html` is absent. **You own escaping** — the return is injected as live DOM; `escape_html` every transcript-derived interpolation ([§4.2](#42-security-conscious-rendering)). |
+| `title` | `(self, renderer, message) -> Optional[str]` | Heading text or `None`. | Return `None` for "headless" (inline) messages. Return `""` (empty string, not None) to suppress the heading explicitly — the dispatcher distinguishes the two. **`escape_html` transcript-derived interpolation** — the title is emitted via `\| safe` with no core escaping ([§4.2](#42-security-conscious-rendering)). |
**`format_html` is opt-in.** If your plugin class defines only
`format_markdown`, the HtmlRenderer dispatcher automatically
@@ -271,22 +283,27 @@ moves to the next ancestor.
### 4.1 Plugin-facing helpers
-Two helpers are re-exported from `claude_code_log.plugins` for use
-in `format_html` / `format_markdown` methods. The re-export is the
-stable plugin API; the underlying implementation in
-`claude_code_log/html/utils.py` may move or be renamed.
+Four helpers are re-exported from `claude_code_log.plugins` for use
+in `format_html` / `format_markdown` / `title` methods. The re-export is
+the stable plugin API; the underlying implementation (in
+`claude_code_log/html/utils.py` and `claude_code_log/markdown/renderer.py`)
+may move or be renamed.
```python
from claude_code_log.plugins import (
render_markdown,
render_markdown_collapsible,
+ escape_html,
+ safe_markdown_inline,
)
```
| Helper | Signature | Use when |
|---|---|---|
-| `render_markdown(text)` | `(str) -> str` | You need Markdown→HTML inside a custom `format_html` (e.g. embedding a Markdown fragment in a richer HTML scaffold). |
-| `render_markdown_collapsible(raw_content, css_class, *, line_threshold=20, preview_line_count=5)` | `(str, str, int, int) -> str` | Long Markdown bodies (mail bodies, agent responses, multi-paragraph result text). Returns inline `…
` for short content, a collapsible `` with preview + full body for content exceeding `line_threshold`. |
+| `render_markdown(text)` | `(str) -> str` | You need Markdown→HTML inside a custom `format_html` (e.g. embedding a Markdown fragment in a richer HTML scaffold). Escapes raw HTML in `text` (untrusted-safe). |
+| `render_markdown_collapsible(raw_content, css_class, *, line_threshold=20, preview_line_count=5)` | `(str, str, int, int) -> str` | Long Markdown bodies (mail bodies, agent responses, multi-paragraph result text). Returns inline `…
` for short content, a collapsible `` with preview + full body for content exceeding `line_threshold`. Escapes raw HTML. |
+| `escape_html(text)` | `(str) -> str` | Interpolating transcript-derived text into a `format_html` raw-HTML string, OR into a `title` return (which is emitted via `\| safe`). Entity-escapes `<`, `>`, `&`, quotes. |
+| `safe_markdown_inline(text)` | `(str) -> str` | Interpolating transcript-derived text into a Markdown **inline** fragment in `format_markdown` (a link label, a list item, inline prose) — the Markdown-output path emits `format_markdown` verbatim. Entity-escapes raw HTML tags while preserving Markdown formatting. |
The reference plugin's
[`tool_communicate_result.py`](../test/_plugins/clmail/src/claude_code_log_clmail_test/transformers/tool_communicate_result.py)
@@ -297,6 +314,59 @@ Add to `claude_code_log.plugins.__all__` only on concrete plugin-author
demand — every entry is an API commitment. Open an issue if a helper
you need isn't exposed.
+### 4.2 Security-conscious rendering
+
+**Every transcript-derived value is untrusted.** A transcript is not a
+trusted document: the assistant routinely echoes arbitrary user / file /
+web input verbatim ("write a test that types ``
+into the field"), and tool names, hook names, `cwd`-derived project names,
+session summaries and the like are all attacker-influenceable. There is no
+"trusted source" — if a value came from the transcript, escape it before it
+reaches a rendered position. A plugin that interpolates transcript data into
+its render methods reproduces the exact XSS sink class that issue #245 closed
+in the core.
+
+**Helper per context** — pick by *where the value lands*, covering **both**
+output paths:
+
+| Context (where the value lands) | Helper |
+|---|---|
+| `format_html` raw-HTML f-string (text or attribute) | `escape_html(value)` |
+| `format_html` embedding a Markdown body | `render_markdown(value)` / `render_markdown_collapsible(value, …)` |
+| `format_markdown` inline fragment (link label, list item, inline prose) | `safe_markdown_inline(value)` |
+| `title()` return | `escape_html(value)` — the HTML header emits it via `\| safe` (no core escaping); the Markdown heading is auto-gated by the core |
+
+Notes:
+
+- **Don't double-escape.** Pass a value through *one* helper for its context.
+ Don't `escape_html` a value and then also feed it to `render_markdown`
+ (you'd get visible `<` entities). A constant string needs no helper.
+- **`format_markdown` HTML safety is automatic; Markdown safety is not.**
+ When only `format_markdown` is defined, the HTML path runs it through
+ `render_markdown` (escapes raw HTML for you). The Markdown-output path emits
+ `format_markdown` verbatim, so inline transcript interpolation there needs
+ `safe_markdown_inline`.
+
+**Sink-class self-audit checklist** — scan your render methods for:
+
+- a raw f-string interpolating transcript data into HTML (`format_html`)
+- transcript data interpolated into a `title()` return
+- transcript data in a Markdown heading or inline link/list label
+ (`format_markdown`)
+- anything you emit into a `\| safe` / non-autoescaped context
+
+**Prefer one structural gate over per-site escaping.** The #245 class took
+several review rounds precisely because each sink was individually "known" but
+no single chokepoint enforced neutralisation, so new sinks kept slipping in.
+The core now routes every Markdown heading/label through one
+`safe_markdown_inline` gate. Apply the same principle in your plugin: if you
+build several rendered strings from transcript data, funnel them through one
+helper at one place rather than remembering to escape at each call site.
+
+For the core-renderer view of this contract (the per-field escaping the host
+renderer applies, and the title/markdown gate internals), see
+[implementing-a-tool-renderer.md](implementing-a-tool-renderer.md).
+
---
## 5. Dispatch resolution order
@@ -573,6 +643,7 @@ own plugin should follow the same shape:
| **2. Dispatch matrix** | Renderer-side vs class-side resolution; HTML vs Markdown output | Skip unless your plugin does something exotic with the dispatcher. |
| **3. Transformer integration** | End-to-end: real `MessageContent` through your `transform()` and class-side render methods | Always write this. Drive your transformer with hand-built `MessageMeta.empty()` candidates; assert the replacement is an instance of your subclass and that the render methods return the expected text. |
| **4. Text-equivalence** | If your plugin reads `UserTextMessage.items`, assert that the joined text matches what the factory's `extract_text_content` produces | Recommended for any plugin keying on user text — protects you against future core refactors that sneak normalization between extraction and the items list. |
+| **5. XSS payload** | Feed a payload (`
`) into every transcript-derived field your render methods interpolate, render BOTH paths, and assert the raw tag is escaped (not present verbatim) | Write this whenever your `format_html` / `format_markdown` / `title` interpolates transcript data ([§4.2](#42-security-conscious-rendering)). String-level: assert `"
None:
assert "bold" in render_user_markdown("**bold**")
+def test_render_markdown_escapes_html() -> None:
+ """The shared (assistant/tool/web-authored) renderer must escape raw HTML.
+
+ Regression for the XSS where a transcript whose assistant text, tool
+ result, or fetched web content contained ``'
+ out = render_markdown(payload)
+ # No live markup reaches the DOM (escaped tags carry a dead
+ # ``onerror="`` but never a live ``onerror="`` on a real tag).
+ assert ")",
+ ):
+ out = render(src)
+ # The scheme is dangerous only as a live href/src attribute;
+ # appearing as visible link text is harmless.
+ assert 'href="javascript:' not in out, (render.__name__, src, out)
+ assert 'src="javascript:' not in out, (render.__name__, src, out)
+ assert 'href="data:' not in out, (render.__name__, src, out)
+ assert 'src="data:' not in out, (render.__name__, src, out)
+
+
+def test_assistant_text_does_not_inject_live_html() -> None:
+ """End-to-end: assistant text with HTML must not emit live tags."""
+ from claude_code_log.html.assistant_formatters import (
+ format_assistant_text_content,
+ )
+ from claude_code_log.models import (
+ AssistantTextMessage,
+ MessageMeta,
+ TextContent,
+ )
+
+ meta = MessageMeta(
+ session_id="test-session",
+ timestamp="2024-01-01T00:00:00Z",
+ uuid="test-uuid",
+ )
+ payload = "Sure! I'll enter into the field."
+ content = AssistantTextMessage(meta, items=[TextContent(type="text", text=payload)])
+ out = format_assistant_text_content(content)
+ assert " into the
+field"). See ``html/utils.py::_get_markdown_renderer`` for the escape policy.
+
+It is ALSO placed in a TITLE field — a generic tool_use's ``name`` (#245):
+the header title path (``{{ message_title | safe }}``) has no central
+escaping, so a tool with no specialized title method renders its raw name
+live in the header span. That channel is blind to the body-render escape and
+needs the per-field ``escape_html`` on the HTML title methods.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from playwright.sync_api import Page
+
+from claude_code_log.converter import load_transcript
+from claude_code_log.html.renderer import generate_html
+
+# Auto-firing vectors: "
+)
+
+_BASE = {
+ "cwd": "/app",
+ "isSidechain": False,
+ "sessionId": "11110000-0000-4000-8000-000000000001",
+ "userType": "external",
+ "version": "1.0.0",
+}
+_USAGE = {"input_tokens": 1, "output_tokens": 1}
+
+
+def _write_transcript(path: Path) -> None:
+ rows = [
+ {
+ **_BASE,
+ "type": "user",
+ "uuid": "u1",
+ "parentUuid": None,
+ "timestamp": "2026-06-27T10:00:00.000Z",
+ "message": {
+ "role": "user",
+ "content": f"Write an E2E test that types this: {PAYLOAD}",
+ },
+ },
+ {
+ **_BASE,
+ "type": "assistant",
+ "uuid": "a1",
+ "parentUuid": "u1",
+ "requestId": "r1",
+ "timestamp": "2026-06-27T10:00:01.000Z",
+ "message": {
+ "role": "assistant",
+ "model": "claude",
+ "id": "m1",
+ "type": "message",
+ "stop_reason": "end_turn",
+ "stop_sequence": None,
+ "usage": _USAGE,
+ "content": [
+ {"type": "text", "text": f"Sure! I'll enter {PAYLOAD} there."},
+ {
+ "type": "tool_use",
+ "id": "t1",
+ "name": "Write",
+ "input": {
+ "file_path": "/app/t.spec.ts",
+ "content": f"await page.fill('#in', '{PAYLOAD}');",
+ },
+ },
+ ],
+ },
+ },
+ {
+ **_BASE,
+ "type": "user",
+ "uuid": "u2",
+ "parentUuid": "a1",
+ "timestamp": "2026-06-27T10:00:02.000Z",
+ "message": {
+ "role": "user",
+ "content": [
+ {
+ "type": "tool_result",
+ "tool_use_id": "t1",
+ "content": f"Wrote file containing {PAYLOAD}",
+ }
+ ],
+ },
+ },
+ {
+ # TITLE-path vector (#245): a tool with no specialized title method
+ # falls back to its raw name in the header span. The name IS the
+ # payload — auto-firing if the title isn't escaped.
+ **_BASE,
+ "type": "assistant",
+ "uuid": "a2",
+ "parentUuid": "u2",
+ "requestId": "r2",
+ "timestamp": "2026-06-27T10:00:03.000Z",
+ "message": {
+ "role": "assistant",
+ "model": "claude",
+ "id": "m2",
+ "type": "message",
+ "stop_reason": "end_turn",
+ "stop_sequence": None,
+ "usage": _USAGE,
+ "content": [
+ {
+ "type": "tool_use",
+ "id": "t2",
+ "name": PAYLOAD,
+ "input": {"q": "noop"},
+ }
+ ],
+ },
+ },
+ ]
+ path.write_text("\n".join(json.dumps(r) for r in rows) + "\n", encoding="utf-8")
+
+
+class TestTranscriptXss:
+ @pytest.mark.browser
+ def test_opening_transcript_fires_no_dialog(
+ self, page: Page, tmp_path: Path
+ ) -> None:
+ jsonl = tmp_path / "xss.jsonl"
+ _write_transcript(jsonl)
+ entries = load_transcript(jsonl, silent=True)
+ html_path = tmp_path / "xss.html"
+ html_path.write_text(generate_html(entries, "XSS"), encoding="utf-8")
+
+ dialogs: list[str] = []
+
+ def _on_dialog(dialog: object) -> None: # pragma: no cover - event cb
+ dialogs.append(getattr(dialog, "message", ""))
+ dialog.dismiss() # type: ignore[attr-defined]
+
+ page.on("dialog", _on_dialog)
+ page.goto(html_path.as_uri())
+ # Give any onerror/script a tick to fire.
+ page.wait_for_timeout(200)
+
+ assert dialogs == [], f"XSS executed — dialogs fired: {dialogs}"
+
+ # And the payload is still visible to the reader, as escaped text.
+ body_text = page.inner_text("body")
+ assert "script-xss" in body_text
+ assert "alert('img-xss')" in body_text
+
+ @pytest.mark.browser
+ def test_no_content_supplied_nodes_in_dom(self, page: Page, tmp_path: Path) -> None:
+ """The payload tags must not materialise as live DOM nodes."""
+ jsonl = tmp_path / "xss.jsonl"
+ _write_transcript(jsonl)
+ entries = load_transcript(jsonl, silent=True)
+ html_path = tmp_path / "xss.html"
+ html_path.write_text(generate_html(entries, "XSS"), encoding="utf-8")
+
+ page.goto(html_path.as_uri())
+
+ # No
whose src is the payload's bogus "x" was injected, and no
+ # content-supplied leaked as a live element.
+ injected = page.evaluate(
+ "() => ({"
+ " imgs: document.querySelectorAll('img[src=\"x\"]').length,"
+ " bolds: Array.from(document.querySelectorAll('b'))"
+ " .filter(b => b.textContent === 'bold').length,"
+ "})"
+ )
+ assert injected == {"imgs": 0, "bolds": 0}, injected
diff --git a/test/test_xss_markdown_surfaces.py b/test/test_xss_markdown_surfaces.py
new file mode 100644
index 00000000..7f124b0e
--- /dev/null
+++ b/test/test_xss_markdown_surfaces.py
@@ -0,0 +1,128 @@
+"""XSS: every Markdown interpolation surface routes through one gate (#245).
+
+Round-4 follow-up: the per-message + page/project/session headings were gated
+last; this generalises the gate to `safe_markdown_inline` and routes the inline
+link-label / list surfaces through it too — so "neutralise raw HTML from every
+source" is a single structural property, not a per-site convention.
+
+Surfaces (markdown/renderer.py), each driven end-to-end here:
+- WebSearch result link title → format_WebSearchOutput
+- projects-index project heading + link → generate_projects_index
+- per-project session-link label → generate_projects_index (combined off)
+- expand-paths tree label → generate_projects_index (expand_paths)
+- (TOC label + headings are pinned in test_xss_titles.py)
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from claude_code_log.markdown.renderer import MarkdownRenderer, safe_markdown_inline
+from claude_code_log.models import (
+ MessageMeta,
+ SystemMessage,
+ WebSearchLink,
+ WebSearchOutput,
+)
+from claude_code_log.renderer import TemplateMessage
+
+PAYLOAD = "
"
+
+
+def _no_raw_tag(md: str) -> None:
+ assert "
`` marker), so there is no
+central escaping. Each ``title_*`` method that interpolates a transcript field
+must therefore escape that field on the HTML path. daaain's PR secured the
+message *body* (the shared Markdown renderer → ``escape=True``) but left four
+title sinks unescaped; these pin them.
+
+The four sinks (all on the shared base ``Renderer``; the HTML renderer escapes
+on its side only, so the Markdown renderer doesn't get HTML-entity-escaped
+titles):
+
+1. generic / mcp__* / custom tool name — ``title_ToolUseMessage`` fallback
+2. hook name — ``title_HookAttachmentMessage``
+3. workflow phase title — ``title_WorkflowPhaseMessage``
+4. workflow agent label — ``title_WorkflowAgentMessage``
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from claude_code_log.converter import load_transcript
+from claude_code_log.html.renderer import HtmlRenderer, generate_html
+from claude_code_log.markdown.renderer import MarkdownRenderer
+from claude_code_log.models import (
+ BashInput,
+ HookAttachmentMessage,
+ MessageMeta,
+ SystemMessage,
+ ToolUseMessage,
+ WorkflowAgentMessage,
+ WorkflowPhaseMessage,
+)
+from claude_code_log.renderer import TemplateMessage
+
+PAYLOAD = "
"
+ESCAPED = "<img src=x onerror=alert(1)>"
+
+
+def _meta() -> MessageMeta:
+ return MessageMeta(uuid="u", session_id="s", timestamp="2025-01-01T00:00:00Z")
+
+
+def _title(content) -> str:
+ msg = TemplateMessage(content)
+ return HtmlRenderer().title_content(msg)
+
+
+class TestTitlePathEscaping:
+ def test_generic_tool_name_escaped(self, tmp_path: Path):
+ # A tool with no specialized title method (generic / mcp__* / custom)
+ # falls back to its raw name in the header. Exercise the real render
+ # path with the payload AS the tool name.
+ rows = [
+ {
+ "type": "user",
+ "uuid": "u0",
+ "parentUuid": None,
+ "isSidechain": False,
+ "userType": "external",
+ "cwd": "/x",
+ "sessionId": "s1",
+ "version": "1.0",
+ "timestamp": "2025-01-01T00:00:00Z",
+ "message": {"role": "user", "content": "go"},
+ },
+ {
+ "type": "assistant",
+ "uuid": "a1",
+ "parentUuid": "u0",
+ "isSidechain": False,
+ "userType": "external",
+ "cwd": "/x",
+ "sessionId": "s1",
+ "version": "1.0",
+ "timestamp": "2025-01-01T00:00:01Z",
+ "requestId": "r1",
+ "message": {
+ "id": "m1",
+ "type": "message",
+ "role": "assistant",
+ "model": "claude",
+ "stop_reason": "tool_use",
+ "stop_sequence": None,
+ "usage": {"input_tokens": 1, "output_tokens": 1},
+ "content": [
+ {"type": "tool_use", "id": "t1", "name": PAYLOAD, "input": {}}
+ ],
+ },
+ },
+ ]
+ f = tmp_path / "x.jsonl"
+ f.write_text("\n".join(json.dumps(r) for r in rows), encoding="utf-8")
+ html = generate_html(load_transcript(f, silent=True), "x")
+ assert PAYLOAD not in html
+ assert ESCAPED in html
+
+ def test_hook_name_escaped(self):
+ content = HookAttachmentMessage(meta=_meta(), kind="success", hook_name=PAYLOAD)
+ out = _title(content)
+ assert out.startswith("Hook · ")
+ assert PAYLOAD not in out
+ assert ESCAPED in out
+
+ def test_workflow_phase_title_escaped(self):
+ content = WorkflowPhaseMessage(meta=_meta(), title=PAYLOAD)
+ out = _title(content)
+ assert out.startswith("Phase: ")
+ assert PAYLOAD not in out
+ assert ESCAPED in out
+
+ def test_workflow_agent_label_escaped(self):
+ content = WorkflowAgentMessage(meta=_meta(), label=PAYLOAD)
+ out = _title(content)
+ assert out.startswith("Agent ")
+ assert PAYLOAD not in out
+ assert ESCAPED in out
+
+ def test_system_level_escaped(self):
+ # ``level`` is free-text from the transcript, not an enum. The title is
+ # ``System {level.title()}``; the title-casing folds the tag name's
+ # case (``
``) but the tag would still fire (HTML attrs are
+ # case-insensitive), so it must be escaped AFTER ``.title()``.
+ content = SystemMessage(meta=_meta(), level=PAYLOAD, text="x")
+ out = _title(content)
+ assert out.startswith("System ")
+ # No live tag at any case; the dangerous ``<`` is entity-escaped, and
+ # ``.title()`` didn't corrupt the entity (no ``≪``).
+ assert "
`` for a
+ downstream viewer to execute (#245)."""
+
+ def test_generic_tool_name_protected_in_markdown_heading(self, tmp_path: Path):
+ rows = [
+ {
+ "type": "user",
+ "uuid": "u0",
+ "parentUuid": None,
+ "isSidechain": False,
+ "userType": "external",
+ "cwd": "/x",
+ "sessionId": "s1",
+ "version": "1.0",
+ "timestamp": "2025-01-01T00:00:00Z",
+ "message": {"role": "user", "content": "go"},
+ },
+ {
+ "type": "assistant",
+ "uuid": "a1",
+ "parentUuid": "u0",
+ "isSidechain": False,
+ "userType": "external",
+ "cwd": "/x",
+ "sessionId": "s1",
+ "version": "1.0",
+ "timestamp": "2025-01-01T00:00:01Z",
+ "requestId": "r1",
+ "message": {
+ "id": "m1",
+ "type": "message",
+ "role": "assistant",
+ "model": "claude",
+ "stop_reason": "tool_use",
+ "stop_sequence": None,
+ "usage": {"input_tokens": 1, "output_tokens": 1},
+ "content": [
+ {"type": "tool_use", "id": "t1", "name": PAYLOAD, "input": {}}
+ ],
+ },
+ },
+ ]
+ f = tmp_path / "x.jsonl"
+ f.write_text("\n".join(json.dumps(r) for r in rows), encoding="utf-8")
+ md = MarkdownRenderer().generate(load_transcript(f, silent=True), "x")
+ # The raw tag must not survive into the heading…
+ assert f"# {PAYLOAD}" not in md
+ assert "![]()