diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py index c34a4bca..6e7df37f 100644 --- a/claude_code_log/html/renderer.py +++ b/claude_code_log/html/renderer.py @@ -943,7 +943,15 @@ def title_ToolUseMessage( content.input, AskUserQuestionInput ) and self._paired_answer_supersedes(message): return "" - return super().title_ToolUseMessage(content, message) + # Specialized tools dispatch to a title_*Input method that escapes via + # ``_tool_title``. Tools with NO specialized method (generic / mcp__* / + # ToolSearch / custom) fall back to the raw tool name — which is + # attacker-controllable and lands live in the header span. Escape it + # here rather than in the shared base ``title_ToolUseMessage`` (the + # Markdown renderer must not get HTML-entity-escaped titles). #245 XSS. + if title := self._dispatch_title(content.input, message): + return title + return escape_html(content.tool_name) def title_ToolResultMessage( self, content: ToolResultMessage, message: TemplateMessage @@ -964,6 +972,39 @@ def title_ToolResultMessage( return f"{base} {marker}" if base else marker return base + # Title overrides that escape their transcript-derived field for the HTML + # header span. These titles are built on the shared base ``Renderer`` (also + # used by the Markdown renderer, which must NOT receive HTML-entity-escaped + # titles), so the escaping lives on the HTML path only — mirroring how + # ``_tool_title`` escapes the tool name for specialized tools. #245 XSS. + + def title_HookAttachmentMessage( + self, content: HookAttachmentMessage, _: TemplateMessage + ) -> str: + # ``hook_name`` (e.g. "PostToolUse:TaskUpdate") is transcript-derived + # and lands in the header; escape it. + label = content.hook_name or content.hook_event or content.kind + return f"Hook · {escape_html(label)}" + + def title_WorkflowPhaseMessage( + self, content: WorkflowPhaseMessage, _: TemplateMessage + ) -> str: + return f"Phase: {escape_html(content.title)}" if content.title else "Phase" + + def title_WorkflowAgentMessage( + self, content: WorkflowAgentMessage, _: TemplateMessage + ) -> str: + return f"Agent {escape_html(content.label)}" if content.label else "Agent" + + def title_SystemMessage(self, content: SystemMessage, _: TemplateMessage) -> str: + # ``level`` is FREE-TEXT from the transcript (``system_factory``: + # ``transcript.level or "info"``), not an enum — so it can carry a + # payload that lands in the header. Title-case the RAW level FIRST, + # then escape: escaping first would let ``.title()`` capitalize the + # entity prefixes (``<`` → ``≪``) and break the escaping. #245 XSS. + level = content.level or "unknown" + return f"System {escape_html(level.title())}" + def title_TaskInput(self, input: TaskInput, message: TemplateMessage) -> str: """Title → '🔧 Task (subagent_type) [async #]'. diff --git a/claude_code_log/html/utils.py b/claude_code_log/html/utils.py index 632fa2c6..1575c75c 100644 --- a/claude_code_log/html/utils.py +++ b/claude_code_log/html/utils.py @@ -424,7 +424,24 @@ def block_code(code: str, info: Optional[str] = None) -> str: @functools.lru_cache(maxsize=1) def _get_markdown_renderer() -> mistune.Markdown: - """Get cached Mistune markdown renderer with Pygments syntax highlighting.""" + """Get cached Mistune markdown renderer with Pygments syntax highlighting. + + Uses ``escape=True`` so raw HTML embedded in the source text + (```` into the field" — + so rendering it unescaped lets that payload execute when the transcript + HTML is opened. The Markdown output path already neutralises raw HTML + from every source (see ``markdown/renderer.py::_protect_html_tags``); + the HTML path must match. ``escape=True`` does not affect Markdown + formatting, plugin output (Pygments, SHA links), or code fences — only + raw HTML tags in the body. + """ from ..markdown_plugins import make_codespan_sha_plugin, make_sha_plugin from ..git_remote import resolve_sha_for_current_render @@ -447,7 +464,7 @@ def _get_markdown_renderer() -> mistune.Markdown: # mistune's built-in rule consumes the backticks. make_codespan_sha_plugin(resolve_sha_for_current_render), ], - escape=False, # Don't escape HTML since we want to render markdown properly + escape=True, # Escape raw HTML: transcript content is untrusted (XSS) hard_wrap=True, # Line break for newlines (checklists in Assistant messages) ) @@ -490,11 +507,13 @@ def render_markdown_inline(text: str) -> str: def _get_user_markdown_renderer() -> mistune.Markdown: """Markdown renderer for user-authored text. - Differs from the shared renderer in one critical way: ``escape=True`` - so a user typing raw ``` into +the field"* lands that payload in assistant prose, a tool result, and a +Write tool's file content. If it reaches the HTML unescaped it executes when +the file is opened. There is no "trusted" source here. + +Two safe paths, depending on what you emit: + +- **Building HTML with f-strings/format** → run every interpolated value + through `escape_html()` first (as the input formatter above does with + `escaped_query`). Never interpolate a raw field into markup. +- **Rendering markdown** → use `render_markdown` / `render_markdown_collapsible`. + Both use mistune with `escape=True`, so raw HTML tags in the body are + escaped to entities and unsafe link/image schemes (`javascript:`, `data:`) + are neutralised, while Markdown, code fences and Pygments still render. + +Regression coverage lives in `test/test_markdown_rendering.py` (unit) and +`test/test_xss_browser.py` (empirical: opens the file in a real browser and +asserts no `alert()` dialog fires). Add a payload-bearing case for any new +field you render. + ### Update Exports Add functions to `__all__`: diff --git a/dev-docs/plugins.md b/dev-docs/plugins.md index f8136750..eb492008 100644 --- a/dev-docs/plugins.md +++ b/dev-docs/plugins.md @@ -212,6 +212,7 @@ renderer's dispatcher consults them after the renderer's own from dataclasses import dataclass from typing import ClassVar, Optional from claude_code_log.models import DetailLevel, ToolUseMessage +from claude_code_log.plugins import escape_html, safe_markdown_inline @dataclass class MyToolMessage(ToolUseMessage): @@ -220,23 +221,34 @@ class MyToolMessage(ToolUseMessage): detail_visibility: ClassVar[DetailLevel] = DetailLevel.LOW def format_markdown(self, _renderer, _message) -> str: + # ``action`` is transcript-derived (untrusted — see §4.2). Interpolated + # into Markdown SOURCE, so neutralise raw HTML with safe_markdown_inline + # (the Markdown-output path emits this verbatim). action = (self.input.input or {}).get("action", "?") - return f"_(my plugin) action={action}_" + return f"_(my plugin) action={safe_markdown_inline(action)}_" def format_html(self, _renderer, _message) -> Optional[str]: return None # fall back to mistune(format_markdown) def title(self, _renderer, _message) -> Optional[str]: - return "✉ my plugin" + # A title() return goes to ``{{ message_title | safe }}`` with NO core + # HTML escaping — escape transcript-derived interpolation yourself + # (§4.2). The Markdown heading path is auto-gated by the core. + action = (self.input.input or {}).get("action", "?") + return f"✉ my plugin: {escape_html(action)}" ``` +> The constant-string forms (`"✉ my plugin"`, `"_(my plugin)_"`) need no +> escaping — only **transcript-derived interpolation** does. See +> [§4.2](#42-security-conscious-rendering) for the full contract. + Signature contract for each method: | Method | Signature | Return | Notes | |---|---|---|---| -| `format_markdown` | `(self, renderer, message) -> str` | Markdown source string. | Define this whenever your class produces meaningful Markdown. Drives both Markdown output AND HTML output (via mistune) unless `format_html` is also defined. | -| `format_html` | `(self, renderer, message) -> str` | Raw HTML string (real string — no None sentinel). | Define this ONLY when you need HTML different from mistune-of-`format_markdown`. The dispatcher synthesizes that fallback automatically when `format_html` is absent. | -| `title` | `(self, renderer, message) -> Optional[str]` | Heading text or `None`. | Return `None` for "headless" (inline) messages. Return `""` (empty string, not None) to suppress the heading explicitly — the dispatcher distinguishes the two. | +| `format_markdown` | `(self, renderer, message) -> str` | Markdown source string. | Define this whenever your class produces meaningful Markdown. Drives both Markdown output AND HTML output (via mistune) unless `format_html` is also defined. **Escape transcript-derived interpolation** ([§4.2](#42-security-conscious-rendering)): the Markdown-output path emits this verbatim. | +| `format_html` | `(self, renderer, message) -> str` | Raw HTML string (real string — no None sentinel). | Define this ONLY when you need HTML different from mistune-of-`format_markdown`. The dispatcher synthesizes that fallback automatically when `format_html` is absent. **You own escaping** — the return is injected as live DOM; `escape_html` every transcript-derived interpolation ([§4.2](#42-security-conscious-rendering)). | +| `title` | `(self, renderer, message) -> Optional[str]` | Heading text or `None`. | Return `None` for "headless" (inline) messages. Return `""` (empty string, not None) to suppress the heading explicitly — the dispatcher distinguishes the two. **`escape_html` transcript-derived interpolation** — the title is emitted via `\| safe` with no core escaping ([§4.2](#42-security-conscious-rendering)). | **`format_html` is opt-in.** If your plugin class defines only `format_markdown`, the HtmlRenderer dispatcher automatically @@ -271,22 +283,27 @@ moves to the next ancestor. ### 4.1 Plugin-facing helpers -Two helpers are re-exported from `claude_code_log.plugins` for use -in `format_html` / `format_markdown` methods. The re-export is the -stable plugin API; the underlying implementation in -`claude_code_log/html/utils.py` may move or be renamed. +Four helpers are re-exported from `claude_code_log.plugins` for use +in `format_html` / `format_markdown` / `title` methods. The re-export is +the stable plugin API; the underlying implementation (in +`claude_code_log/html/utils.py` and `claude_code_log/markdown/renderer.py`) +may move or be renamed. ```python from claude_code_log.plugins import ( render_markdown, render_markdown_collapsible, + escape_html, + safe_markdown_inline, ) ``` | Helper | Signature | Use when | |---|---|---| -| `render_markdown(text)` | `(str) -> str` | You need Markdown→HTML inside a custom `format_html` (e.g. embedding a Markdown fragment in a richer HTML scaffold). | -| `render_markdown_collapsible(raw_content, css_class, *, line_threshold=20, preview_line_count=5)` | `(str, str, int, int) -> str` | Long Markdown bodies (mail bodies, agent responses, multi-paragraph result text). Returns inline `
` for short content, a collapsible `
` with preview + full body for content exceeding `line_threshold`. | +| `render_markdown(text)` | `(str) -> str` | You need Markdown→HTML inside a custom `format_html` (e.g. embedding a Markdown fragment in a richer HTML scaffold). Escapes raw HTML in `text` (untrusted-safe). | +| `render_markdown_collapsible(raw_content, css_class, *, line_threshold=20, preview_line_count=5)` | `(str, str, int, int) -> str` | Long Markdown bodies (mail bodies, agent responses, multi-paragraph result text). Returns inline `
` for short content, a collapsible `
` with preview + full body for content exceeding `line_threshold`. Escapes raw HTML. | +| `escape_html(text)` | `(str) -> str` | Interpolating transcript-derived text into a `format_html` raw-HTML string, OR into a `title` return (which is emitted via `\| safe`). Entity-escapes `<`, `>`, `&`, quotes. | +| `safe_markdown_inline(text)` | `(str) -> str` | Interpolating transcript-derived text into a Markdown **inline** fragment in `format_markdown` (a link label, a list item, inline prose) — the Markdown-output path emits `format_markdown` verbatim. Entity-escapes raw HTML tags while preserving Markdown formatting. | The reference plugin's [`tool_communicate_result.py`](../test/_plugins/clmail/src/claude_code_log_clmail_test/transformers/tool_communicate_result.py) @@ -297,6 +314,59 @@ Add to `claude_code_log.plugins.__all__` only on concrete plugin-author demand — every entry is an API commitment. Open an issue if a helper you need isn't exposed. +### 4.2 Security-conscious rendering + +**Every transcript-derived value is untrusted.** A transcript is not a +trusted document: the assistant routinely echoes arbitrary user / file / +web input verbatim ("write a test that types `` +into the field"), and tool names, hook names, `cwd`-derived project names, +session summaries and the like are all attacker-influenceable. There is no +"trusted source" — if a value came from the transcript, escape it before it +reaches a rendered position. A plugin that interpolates transcript data into +its render methods reproduces the exact XSS sink class that issue #245 closed +in the core. + +**Helper per context** — pick by *where the value lands*, covering **both** +output paths: + +| Context (where the value lands) | Helper | +|---|---| +| `format_html` raw-HTML f-string (text or attribute) | `escape_html(value)` | +| `format_html` embedding a Markdown body | `render_markdown(value)` / `render_markdown_collapsible(value, …)` | +| `format_markdown` inline fragment (link label, list item, inline prose) | `safe_markdown_inline(value)` | +| `title()` return | `escape_html(value)` — the HTML header emits it via `\| safe` (no core escaping); the Markdown heading is auto-gated by the core | + +Notes: + +- **Don't double-escape.** Pass a value through *one* helper for its context. + Don't `escape_html` a value and then also feed it to `render_markdown` + (you'd get visible `&lt;` entities). A constant string needs no helper. +- **`format_markdown` HTML safety is automatic; Markdown safety is not.** + When only `format_markdown` is defined, the HTML path runs it through + `render_markdown` (escapes raw HTML for you). The Markdown-output path emits + `format_markdown` verbatim, so inline transcript interpolation there needs + `safe_markdown_inline`. + +**Sink-class self-audit checklist** — scan your render methods for: + +- a raw f-string interpolating transcript data into HTML (`format_html`) +- transcript data interpolated into a `title()` return +- transcript data in a Markdown heading or inline link/list label + (`format_markdown`) +- anything you emit into a `\| safe` / non-autoescaped context + +**Prefer one structural gate over per-site escaping.** The #245 class took +several review rounds precisely because each sink was individually "known" but +no single chokepoint enforced neutralisation, so new sinks kept slipping in. +The core now routes every Markdown heading/label through one +`safe_markdown_inline` gate. Apply the same principle in your plugin: if you +build several rendered strings from transcript data, funnel them through one +helper at one place rather than remembering to escape at each call site. + +For the core-renderer view of this contract (the per-field escaping the host +renderer applies, and the title/markdown gate internals), see +[implementing-a-tool-renderer.md](implementing-a-tool-renderer.md). + --- ## 5. Dispatch resolution order @@ -573,6 +643,7 @@ own plugin should follow the same shape: | **2. Dispatch matrix** | Renderer-side vs class-side resolution; HTML vs Markdown output | Skip unless your plugin does something exotic with the dispatcher. | | **3. Transformer integration** | End-to-end: real `MessageContent` through your `transform()` and class-side render methods | Always write this. Drive your transformer with hand-built `MessageMeta.empty()` candidates; assert the replacement is an instance of your subclass and that the render methods return the expected text. | | **4. Text-equivalence** | If your plugin reads `UserTextMessage.items`, assert that the joined text matches what the factory's `extract_text_content` produces | Recommended for any plugin keying on user text — protects you against future core refactors that sneak normalization between extraction and the items list. | +| **5. XSS payload** | Feed a payload (``) into every transcript-derived field your render methods interpolate, render BOTH paths, and assert the raw tag is escaped (not present verbatim) | Write this whenever your `format_html` / `format_markdown` / `title` interpolates transcript data ([§4.2](#42-security-conscious-rendering)). String-level: assert `" None: assert "bold" in render_user_markdown("**bold**") +def test_render_markdown_escapes_html() -> None: + """The shared (assistant/tool/web-authored) renderer must escape raw HTML. + + Regression for the XSS where a transcript whose assistant text, tool + result, or fetched web content contained ``' + out = render_markdown(payload) + # No live markup reaches the DOM (escaped tags carry a dead + # ``onerror="`` but never a live ``onerror="`` on a real tag). + assert ")", + ): + out = render(src) + # The scheme is dangerous only as a live href/src attribute; + # appearing as visible link text is harmless. + assert 'href="javascript:' not in out, (render.__name__, src, out) + assert 'src="javascript:' not in out, (render.__name__, src, out) + assert 'href="data:' not in out, (render.__name__, src, out) + assert 'src="data:' not in out, (render.__name__, src, out) + + +def test_assistant_text_does_not_inject_live_html() -> None: + """End-to-end: assistant text with HTML must not emit live tags.""" + from claude_code_log.html.assistant_formatters import ( + format_assistant_text_content, + ) + from claude_code_log.models import ( + AssistantTextMessage, + MessageMeta, + TextContent, + ) + + meta = MessageMeta( + session_id="test-session", + timestamp="2024-01-01T00:00:00Z", + uuid="test-uuid", + ) + payload = "Sure! I'll enter into the field." + content = AssistantTextMessage(meta, items=[TextContent(type="text", text=payload)]) + out = format_assistant_text_content(content) + assert " into the +field"). See ``html/utils.py::_get_markdown_renderer`` for the escape policy. + +It is ALSO placed in a TITLE field — a generic tool_use's ``name`` (#245): +the header title path (``{{ message_title | safe }}``) has no central +escaping, so a tool with no specialized title method renders its raw name +live in the header span. That channel is blind to the body-render escape and +needs the per-field ``escape_html`` on the HTML title methods. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from playwright.sync_api import Page + +from claude_code_log.converter import load_transcript +from claude_code_log.html.renderer import generate_html + +# Auto-firing vectors: " +) + +_BASE = { + "cwd": "/app", + "isSidechain": False, + "sessionId": "11110000-0000-4000-8000-000000000001", + "userType": "external", + "version": "1.0.0", +} +_USAGE = {"input_tokens": 1, "output_tokens": 1} + + +def _write_transcript(path: Path) -> None: + rows = [ + { + **_BASE, + "type": "user", + "uuid": "u1", + "parentUuid": None, + "timestamp": "2026-06-27T10:00:00.000Z", + "message": { + "role": "user", + "content": f"Write an E2E test that types this: {PAYLOAD}", + }, + }, + { + **_BASE, + "type": "assistant", + "uuid": "a1", + "parentUuid": "u1", + "requestId": "r1", + "timestamp": "2026-06-27T10:00:01.000Z", + "message": { + "role": "assistant", + "model": "claude", + "id": "m1", + "type": "message", + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": _USAGE, + "content": [ + {"type": "text", "text": f"Sure! I'll enter {PAYLOAD} there."}, + { + "type": "tool_use", + "id": "t1", + "name": "Write", + "input": { + "file_path": "/app/t.spec.ts", + "content": f"await page.fill('#in', '{PAYLOAD}');", + }, + }, + ], + }, + }, + { + **_BASE, + "type": "user", + "uuid": "u2", + "parentUuid": "a1", + "timestamp": "2026-06-27T10:00:02.000Z", + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "t1", + "content": f"Wrote file containing {PAYLOAD}", + } + ], + }, + }, + { + # TITLE-path vector (#245): a tool with no specialized title method + # falls back to its raw name in the header span. The name IS the + # payload — auto-firing if the title isn't escaped. + **_BASE, + "type": "assistant", + "uuid": "a2", + "parentUuid": "u2", + "requestId": "r2", + "timestamp": "2026-06-27T10:00:03.000Z", + "message": { + "role": "assistant", + "model": "claude", + "id": "m2", + "type": "message", + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": _USAGE, + "content": [ + { + "type": "tool_use", + "id": "t2", + "name": PAYLOAD, + "input": {"q": "noop"}, + } + ], + }, + }, + ] + path.write_text("\n".join(json.dumps(r) for r in rows) + "\n", encoding="utf-8") + + +class TestTranscriptXss: + @pytest.mark.browser + def test_opening_transcript_fires_no_dialog( + self, page: Page, tmp_path: Path + ) -> None: + jsonl = tmp_path / "xss.jsonl" + _write_transcript(jsonl) + entries = load_transcript(jsonl, silent=True) + html_path = tmp_path / "xss.html" + html_path.write_text(generate_html(entries, "XSS"), encoding="utf-8") + + dialogs: list[str] = [] + + def _on_dialog(dialog: object) -> None: # pragma: no cover - event cb + dialogs.append(getattr(dialog, "message", "")) + dialog.dismiss() # type: ignore[attr-defined] + + page.on("dialog", _on_dialog) + page.goto(html_path.as_uri()) + # Give any onerror/script a tick to fire. + page.wait_for_timeout(200) + + assert dialogs == [], f"XSS executed — dialogs fired: {dialogs}" + + # And the payload is still visible to the reader, as escaped text. + body_text = page.inner_text("body") + assert "script-xss" in body_text + assert "alert('img-xss')" in body_text + + @pytest.mark.browser + def test_no_content_supplied_nodes_in_dom(self, page: Page, tmp_path: Path) -> None: + """The payload tags must not materialise as live DOM nodes.""" + jsonl = tmp_path / "xss.jsonl" + _write_transcript(jsonl) + entries = load_transcript(jsonl, silent=True) + html_path = tmp_path / "xss.html" + html_path.write_text(generate_html(entries, "XSS"), encoding="utf-8") + + page.goto(html_path.as_uri()) + + # No whose src is the payload's bogus "x" was injected, and no + # content-supplied leaked as a live element. + injected = page.evaluate( + "() => ({" + " imgs: document.querySelectorAll('img[src=\"x\"]').length," + " bolds: Array.from(document.querySelectorAll('b'))" + " .filter(b => b.textContent === 'bold').length," + "})" + ) + assert injected == {"imgs": 0, "bolds": 0}, injected diff --git a/test/test_xss_markdown_surfaces.py b/test/test_xss_markdown_surfaces.py new file mode 100644 index 00000000..7f124b0e --- /dev/null +++ b/test/test_xss_markdown_surfaces.py @@ -0,0 +1,128 @@ +"""XSS: every Markdown interpolation surface routes through one gate (#245). + +Round-4 follow-up: the per-message + page/project/session headings were gated +last; this generalises the gate to `safe_markdown_inline` and routes the inline +link-label / list surfaces through it too — so "neutralise raw HTML from every +source" is a single structural property, not a per-site convention. + +Surfaces (markdown/renderer.py), each driven end-to-end here: +- WebSearch result link title → format_WebSearchOutput +- projects-index project heading + link → generate_projects_index +- per-project session-link label → generate_projects_index (combined off) +- expand-paths tree label → generate_projects_index (expand_paths) +- (TOC label + headings are pinned in test_xss_titles.py) +""" + +from __future__ import annotations + +from typing import Any + +from claude_code_log.markdown.renderer import MarkdownRenderer, safe_markdown_inline +from claude_code_log.models import ( + MessageMeta, + SystemMessage, + WebSearchLink, + WebSearchOutput, +) +from claude_code_log.renderer import TemplateMessage + +PAYLOAD = "" + + +def _no_raw_tag(md: str) -> None: + assert "`` marker), so there is no +central escaping. Each ``title_*`` method that interpolates a transcript field +must therefore escape that field on the HTML path. daaain's PR secured the +message *body* (the shared Markdown renderer → ``escape=True``) but left four +title sinks unescaped; these pin them. + +The four sinks (all on the shared base ``Renderer``; the HTML renderer escapes +on its side only, so the Markdown renderer doesn't get HTML-entity-escaped +titles): + +1. generic / mcp__* / custom tool name — ``title_ToolUseMessage`` fallback +2. hook name — ``title_HookAttachmentMessage`` +3. workflow phase title — ``title_WorkflowPhaseMessage`` +4. workflow agent label — ``title_WorkflowAgentMessage`` +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from claude_code_log.converter import load_transcript +from claude_code_log.html.renderer import HtmlRenderer, generate_html +from claude_code_log.markdown.renderer import MarkdownRenderer +from claude_code_log.models import ( + BashInput, + HookAttachmentMessage, + MessageMeta, + SystemMessage, + ToolUseMessage, + WorkflowAgentMessage, + WorkflowPhaseMessage, +) +from claude_code_log.renderer import TemplateMessage + +PAYLOAD = "" +ESCAPED = "<img src=x onerror=alert(1)>" + + +def _meta() -> MessageMeta: + return MessageMeta(uuid="u", session_id="s", timestamp="2025-01-01T00:00:00Z") + + +def _title(content) -> str: + msg = TemplateMessage(content) + return HtmlRenderer().title_content(msg) + + +class TestTitlePathEscaping: + def test_generic_tool_name_escaped(self, tmp_path: Path): + # A tool with no specialized title method (generic / mcp__* / custom) + # falls back to its raw name in the header. Exercise the real render + # path with the payload AS the tool name. + rows = [ + { + "type": "user", + "uuid": "u0", + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": "/x", + "sessionId": "s1", + "version": "1.0", + "timestamp": "2025-01-01T00:00:00Z", + "message": {"role": "user", "content": "go"}, + }, + { + "type": "assistant", + "uuid": "a1", + "parentUuid": "u0", + "isSidechain": False, + "userType": "external", + "cwd": "/x", + "sessionId": "s1", + "version": "1.0", + "timestamp": "2025-01-01T00:00:01Z", + "requestId": "r1", + "message": { + "id": "m1", + "type": "message", + "role": "assistant", + "model": "claude", + "stop_reason": "tool_use", + "stop_sequence": None, + "usage": {"input_tokens": 1, "output_tokens": 1}, + "content": [ + {"type": "tool_use", "id": "t1", "name": PAYLOAD, "input": {}} + ], + }, + }, + ] + f = tmp_path / "x.jsonl" + f.write_text("\n".join(json.dumps(r) for r in rows), encoding="utf-8") + html = generate_html(load_transcript(f, silent=True), "x") + assert PAYLOAD not in html + assert ESCAPED in html + + def test_hook_name_escaped(self): + content = HookAttachmentMessage(meta=_meta(), kind="success", hook_name=PAYLOAD) + out = _title(content) + assert out.startswith("Hook · ") + assert PAYLOAD not in out + assert ESCAPED in out + + def test_workflow_phase_title_escaped(self): + content = WorkflowPhaseMessage(meta=_meta(), title=PAYLOAD) + out = _title(content) + assert out.startswith("Phase: ") + assert PAYLOAD not in out + assert ESCAPED in out + + def test_workflow_agent_label_escaped(self): + content = WorkflowAgentMessage(meta=_meta(), label=PAYLOAD) + out = _title(content) + assert out.startswith("Agent ") + assert PAYLOAD not in out + assert ESCAPED in out + + def test_system_level_escaped(self): + # ``level`` is free-text from the transcript, not an enum. The title is + # ``System {level.title()}``; the title-casing folds the tag name's + # case (````) but the tag would still fire (HTML attrs are + # case-insensitive), so it must be escaped AFTER ``.title()``. + content = SystemMessage(meta=_meta(), level=PAYLOAD, text="x") + out = _title(content) + assert out.startswith("System ") + # No live tag at any case; the dangerous ``<`` is entity-escaped, and + # ``.title()`` didn't corrupt the entity (no ``≪``). + assert "`` for a + downstream viewer to execute (#245).""" + + def test_generic_tool_name_protected_in_markdown_heading(self, tmp_path: Path): + rows = [ + { + "type": "user", + "uuid": "u0", + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": "/x", + "sessionId": "s1", + "version": "1.0", + "timestamp": "2025-01-01T00:00:00Z", + "message": {"role": "user", "content": "go"}, + }, + { + "type": "assistant", + "uuid": "a1", + "parentUuid": "u0", + "isSidechain": False, + "userType": "external", + "cwd": "/x", + "sessionId": "s1", + "version": "1.0", + "timestamp": "2025-01-01T00:00:01Z", + "requestId": "r1", + "message": { + "id": "m1", + "type": "message", + "role": "assistant", + "model": "claude", + "stop_reason": "tool_use", + "stop_sequence": None, + "usage": {"input_tokens": 1, "output_tokens": 1}, + "content": [ + {"type": "tool_use", "id": "t1", "name": PAYLOAD, "input": {}} + ], + }, + }, + ] + f = tmp_path / "x.jsonl" + f.write_text("\n".join(json.dumps(r) for r in rows), encoding="utf-8") + md = MarkdownRenderer().generate(load_transcript(f, silent=True), "x") + # The raw tag must not survive into the heading… + assert f"# {PAYLOAD}" not in md + assert "