daaain · cboos · Jun 30, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py
@@ -943,7 +943,15 @@ def title_ToolUseMessage(
             content.input, AskUserQuestionInput
         ) and self._paired_answer_supersedes(message):
             return ""
-        return super().title_ToolUseMessage(content, message)
+        # Specialized tools dispatch to a title_*Input method that escapes via
+        # ``_tool_title``. Tools with NO specialized method (generic / mcp__* /
+        # ToolSearch / custom) fall back to the raw tool name — which is
+        # attacker-controllable and lands live in the header span. Escape it
+        # here rather than in the shared base ``title_ToolUseMessage`` (the
+        # Markdown renderer must not get HTML-entity-escaped titles). #245 XSS.
+        if title := self._dispatch_title(content.input, message):
+            return title
+        return escape_html(content.tool_name)
 
     def title_ToolResultMessage(
         self, content: ToolResultMessage, message: TemplateMessage
@@ -964,6 +972,39 @@ def title_ToolResultMessage(
             return f"{base} {marker}" if base else marker
         return base
 
+    # Title overrides that escape their transcript-derived field for the HTML
+    # header span. These titles are built on the shared base ``Renderer`` (also
+    # used by the Markdown renderer, which must NOT receive HTML-entity-escaped
+    # titles), so the escaping lives on the HTML path only — mirroring how
+    # ``_tool_title`` escapes the tool name for specialized tools. #245 XSS.
+
+    def title_HookAttachmentMessage(
+        self, content: HookAttachmentMessage, _: TemplateMessage
+    ) -> str:
+        # ``hook_name`` (e.g. "PostToolUse:TaskUpdate") is transcript-derived
+        # and lands in the header; escape it.
+        label = content.hook_name or content.hook_event or content.kind
+        return f"Hook · {escape_html(label)}"
+
+    def title_WorkflowPhaseMessage(
+        self, content: WorkflowPhaseMessage, _: TemplateMessage
+    ) -> str:
+        return f"Phase: {escape_html(content.title)}" if content.title else "Phase"
+
+    def title_WorkflowAgentMessage(
+        self, content: WorkflowAgentMessage, _: TemplateMessage
+    ) -> str:
+        return f"Agent {escape_html(content.label)}" if content.label else "Agent"
+
+    def title_SystemMessage(self, content: SystemMessage, _: TemplateMessage) -> str:
+        # ``level`` is FREE-TEXT from the transcript (``system_factory``:
+        # ``transcript.level or "info"``), not an enum — so it can carry a
+        # payload that lands in the header. Title-case the RAW level FIRST,
+        # then escape: escaping first would let ``.title()`` capitalize the
+        # entity prefixes (``&lt;`` → ``&Lt;``) and break the escaping. #245 XSS.
+        level = content.level or "unknown"
+        return f"System {escape_html(level.title())}"
+
     def title_TaskInput(self, input: TaskInput, message: TemplateMessage) -> str:
         """Title → '🔧 Task <desc> (subagent_type) [async #<id>]'.
 

diff --git a/claude_code_log/html/utils.py b/claude_code_log/html/utils.py
@@ -424,7 +424,24 @@ def block_code(code: str, info: Optional[str] = None) -> str:
 
 @functools.lru_cache(maxsize=1)
 def _get_markdown_renderer() -> mistune.Markdown:
-    """Get cached Mistune markdown renderer with Pygments syntax highlighting."""
+    """Get cached Mistune markdown renderer with Pygments syntax highlighting.
+
+    Uses ``escape=True`` so raw HTML embedded in the source text
+    (``<script>``, ``<img onerror=…>``, bare ``<b>``, …) is rendered as
+    literal entity-escaped text rather than injected as live DOM.
+
+    This renderer handles assistant/tool/web-authored content (assistant
+    prose, Task/WebSearch/WebFetch results, plans, system messages,
+    teammate bodies). That content is **not** trusted: the assistant
+    routinely echoes arbitrary user/file/web input verbatim — e.g. "write
+    an E2E test that types ``<script>alert(1)</script>`` into the field" —
+    so rendering it unescaped lets that payload execute when the transcript
+    HTML is opened. The Markdown output path already neutralises raw HTML
+    from every source (see ``markdown/renderer.py::_protect_html_tags``);
+    the HTML path must match. ``escape=True`` does not affect Markdown
+    formatting, plugin output (Pygments, SHA links), or code fences — only
+    raw HTML tags in the body.
+    """
     from ..markdown_plugins import make_codespan_sha_plugin, make_sha_plugin
     from ..git_remote import resolve_sha_for_current_render
 
@@ -447,7 +464,7 @@ def _get_markdown_renderer() -> mistune.Markdown:
             # mistune's built-in rule consumes the backticks.
             make_codespan_sha_plugin(resolve_sha_for_current_render),
         ],
-        escape=False,  # Don't escape HTML since we want to render markdown properly
+        escape=True,  # Escape raw HTML: transcript content is untrusted (XSS)
         hard_wrap=True,  # Line break for newlines (checklists in Assistant messages)
     )
 
@@ -490,11 +507,13 @@ def render_markdown_inline(text: str) -> str:
 def _get_user_markdown_renderer() -> mistune.Markdown:
     """Markdown renderer for user-authored text.
 
-    Differs from the shared renderer in one critical way: ``escape=True``
-    so a user typing raw ``<script>`` or other HTML sees the literal
-    characters rendered as code, not injected into the DOM. Assistant
-    content uses ``escape=False`` deliberately (tool output renders
-    pre-formed HTML); user content must not bypass escaping.
+    Uses ``escape=True`` so raw ``<script>`` or other HTML in the source is
+    rendered as literal escaped text, not injected into the DOM. The shared
+    renderer (``_get_markdown_renderer``) was historically ``escape=False``
+    for assistant/tool output that emitted pre-formed HTML; it now also
+    escapes — transcript content is untrusted from every source (#245 XSS),
+    so both pipelines neutralise raw HTML. This one is retained for the
+    user-content call sites (``render_user_markdown``).
     """
     from ..markdown_plugins import make_codespan_sha_plugin, make_sha_plugin
     from ..git_remote import resolve_sha_for_current_render
@@ -652,8 +671,10 @@ def _markdown_collapsible(
     preview_line_count: int,
 ) -> str:
     """Shared body for the collapsible-markdown helpers, parameterized by the
-    markdown render function (escape=False for assistant/tool output vs
-    escape=True for untrusted content)."""
+    markdown render function. Both render functions escape raw HTML
+    (``escape=True``): transcript content is untrusted regardless of source —
+    assistant/tool output routinely echoes arbitrary user/file/web input — so
+    raw tags are neutralised rather than injected as live DOM (XSS)."""
     rendered_html = render_fn(raw_content)
 
     lines = raw_content.splitlines()
@@ -686,10 +707,11 @@ def render_markdown_collapsible(
     For long content, creates a collapsible details element with a preview.
     For short content, renders inline with the specified CSS class.
 
-    Uses the ``escape=False`` renderer — for assistant/tool-authored content
-    (Task results, WebSearch/WebFetch, plans) that may emit pre-formed HTML.
-    For untrusted content (e.g. memory files), use
-    ``render_user_markdown_collapsible`` instead.
+    Renders via the shared HTML-escaping renderer (``render_markdown``),
+    so raw HTML in assistant/tool/web-authored content (Task results,
+    WebSearch/WebFetch, plans) is neutralised — transcript content is
+    untrusted (the assistant echoes arbitrary input). Markdown formatting,
+    Pygments highlighting and code fences are unaffected.
 
     Args:
         raw_content: The raw text content to render as markdown

diff --git a/claude_code_log/markdown/renderer.py b/claude_code_log/markdown/renderer.py
@@ -273,6 +273,37 @@ def _protect_html_tags(text: str) -> str:
     return str(rendered).rstrip("\n")
 
 
+def safe_markdown_inline(text: str) -> str:
+    """Neutralise raw HTML in a Markdown inline-text fragment (#245 XSS).
+
+    The single structural gate for EVERY markdown surface that interpolates
+    transcript-derived text into a position a downstream viewer would render as
+    markup — ``#``/``##`` headings (per-message titles + the page/project/
+    session page headings) AND inline link labels / list items (the TOC label,
+    WebSearch result link titles, the project- and session-index link labels,
+    the expand-paths tree labels). Transcript-reachable sources (generic tool
+    names, hook / workflow phase / agent labels, system ``level``, session
+    summaries, project display names derived from ``cwd``, web result titles)
+    could otherwise carry a raw ``<img onerror=…>`` into the ``.md`` for a
+    permissive viewer to execute. Routing every such surface through ONE helper
+    makes "neutralise raw HTML from every source" a structural property — one
+    place to audit, can't be forgotten when a new surface is added — rather
+    than a per-site convention that drifts (the failure mode that produced this
+    whole class across several review rounds).
+
+    Pass only the text FRAGMENT (the label/title/heading text), not a composed
+    ``[label](url)`` — the destination is preserved by the caller.
+
+    Gated on a literal ``<`` (the only char that can open a tag): the mistune
+    round-trip in ``_protect_html_tags`` re-normalises markdown escaping
+    (``\\*\\*`` → ``\\**``), so a fragment with no tag must pass through
+    byte-identical (no churn, no collateral mangling). Markdown-appropriate
+    (entity-escape the tag, preserve markdown) — distinct from the HTML path,
+    which escapes per-field via ``escape_html`` in the ``HtmlRenderer``.
+    """
+    return _protect_html_tags(text) if "<" in text else text
+
+
 def _render_expand_paths_tree(template_projects: list[Any]) -> list[str]:
     """Render `--expand-paths` Markdown index as a nested bullet-list
     directory tree.
@@ -347,7 +378,7 @@ def _emit(node: dict[str, Any], depth: int) -> None:
             _emit(node[name], depth + 1)
         for label, url, ts in node.get("_links", []):
             ts_suffix = f" — *{ts}*" if ts else ""
-            lines.append(f"{indent}- [{label}]({url}){ts_suffix}")
+            lines.append(f"{indent}- [{safe_markdown_inline(label)}]({url}){ts_suffix}")
 
     _emit(root, 0)
     return lines
@@ -1525,7 +1556,7 @@ def format_WebSearchOutput(
                 parts.append("---")
                 parts.append("")
             for link in output.links:
-                parts.append(f"- [{link.title}]({link.url})")
+                parts.append(f"- [{safe_markdown_inline(link.title)}]({link.url})")
         elif not output.summary:
             # Only show "no results" if there's also no summary
             parts.append("*No results found*")
@@ -1971,7 +2002,7 @@ def _generate_toc(self, session_nav: list[dict[str, Any]]) -> str:
                     if summary
                     else f"Session `{session_short}`"
                 )
-            lines.append(f"- [{label}](#{anchor})")
+            lines.append(f"- [{safe_markdown_inline(label)}](#{anchor})")
         lines.append("")
         return "\n".join(lines)
 
@@ -2049,7 +2080,9 @@ def _render_message(self, msg: TemplateMessage, level: int) -> str:
 
             if not suppress_heading:
                 heading_level = min(level, 6)  # Markdown max is h6
-                parts.append(f"{'#' * heading_level} {title}")
+                # Neutralise raw HTML in the title via the single heading gate
+                # (#245 XSS) — see ``safe_markdown_inline``.
+                parts.append(f"{'#' * heading_level} {safe_markdown_inline(title)}")
                 # Per-message timestamp line (issue #160). Skip for
                 # session headers (they have no meaningful per-msg time)
                 # and when the heading was suppressed by `compact` mode
@@ -2142,7 +2175,7 @@ def _generate_inner(
         }
 
         parts = [f"<!-- Generated by claude-code-log v{get_library_version()} -->", ""]
-        parts.append(f"# {title}")
+        parts.append(f"# {safe_markdown_inline(title)}")
 
         # Table of Contents
         if session_nav:
@@ -2225,7 +2258,7 @@ def generate_projects_index(
         template_projects, template_summary = prepare_projects_index(project_summaries)
 
         parts = [f"<!-- Generated by claude-code-log v{get_library_version()} -->", ""]
-        parts.append(f"# {title}")
+        parts.append(f"# {safe_markdown_inline(title)}")
 
         # Summary stats
         parts.append(
@@ -2252,11 +2285,15 @@ def generate_projects_index(
                 # `--combined no` mode: header is a plain heading (no
                 # link to the non-existent combined file); per-session
                 # bullets link directly to `session-{id}.md` files.
-                parts.append(f"## {project.display_name}")
+                parts.append(f"## {safe_markdown_inline(project.display_name)}")
             else:
-                # Derive markdown link from html_file path
+                # Derive markdown link from html_file path. Neutralise only the
+                # display_name fragment (transcript-reachable via cwd) so the
+                # link target is preserved.
                 md_link = project.html_file.replace(".html", ".md")
-                parts.append(f"## [{project.display_name}]({md_link})")
+                parts.append(
+                    f"## [{safe_markdown_inline(project.display_name)}]({md_link})"
+                )
             # Use actual session count (filtered) like HTML does
             session_count = (
                 len(project.sessions) if project.sessions else project.jsonl_count
@@ -2284,7 +2321,9 @@ def generate_projects_index(
                     timestamp_suffix = (
                         f" — *{timestamp_range}*" if timestamp_range else ""
                     )
-                    parts.append(f"- [{label}]({file_link}){timestamp_suffix}")
+                    parts.append(
+                        f"- [{safe_markdown_inline(label)}]({file_link}){timestamp_suffix}"
+                    )
             parts.append("")
 
         return "\n".join(parts)

diff --git a/claude_code_log/plugins.py b/claude_code_log/plugins.py
@@ -39,7 +39,12 @@
     # Visible to static type-checkers (pyright/mypy) and to
     # ``__all__`` validation; resolved at runtime via the
     # PEP-562 ``__getattr__`` further down.
-    from .html.utils import render_markdown, render_markdown_collapsible
+    from .html.utils import (
+        escape_html,
+        render_markdown,
+        render_markdown_collapsible,
+    )
+    from .markdown.renderer import safe_markdown_inline
 
 from .models import MessageContent, MessageMeta
 
@@ -343,29 +348,45 @@ def apply_transformers(
 # documented signatures.
 
 _PUBLIC_HELPERS: frozenset[str] = frozenset(
-    {"render_markdown", "render_markdown_collapsible"}
+    {
+        "render_markdown",
+        "render_markdown_collapsible",
+        # Security helpers (#245): a plugin's ``format_html`` / ``title`` may
+        # interpolate transcript-derived (untrusted) data; without a surfaced
+        # primitive the author reproduces the title/markdown XSS sinks. See
+        # ``dev-docs/plugins.md`` §4 "Security-conscious rendering".
+        "escape_html",
+        "safe_markdown_inline",
+    }
 )
 
 
 def __getattr__(name: str) -> Any:  # PEP 562
     if name in _PUBLIC_HELPERS:
-        from .html.utils import (
-            render_markdown as _rm,
-            render_markdown_collapsible as _rmc,
-        )
+        # ``safe_markdown_inline`` (the markdown renderer's inline HTML-
+        # neutralising gate — entity-escapes raw HTML tags in an inline
+        # markdown fragment, preserving markdown) lives in
+        # ``markdown/renderer.py``; the others in ``html/utils.py``. Resolved
+        # lazily to keep package init acyclic.
+        if name == "safe_markdown_inline":
+            from .markdown.renderer import safe_markdown_inline as resolved
+        else:
+            from .html import utils as _utils
 
-        globals()["render_markdown"] = _rm
-        globals()["render_markdown_collapsible"] = _rmc
-        return globals()[name]
+            resolved = getattr(_utils, name)
+        globals()[name] = resolved
+        return resolved
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
 __all__ = [
     "ENTRY_POINT_GROUP",
     "MessageTransformer",
     "apply_transformers",
+    "escape_html",
     "load_transformers",
     "render_markdown",
     "render_markdown_collapsible",
     "reset_cache",
+    "safe_markdown_inline",
 ]
diff --git a/dev-docs/implementing-a-tool-renderer.md b/dev-docs/implementing-a-tool-renderer.md
@@ -177,6 +177,30 @@ def format_websearch_output(output: WebSearchOutput) -> str:
     return render_markdown_collapsible(markdown_content, "websearch-results")
 ```
 
+### Escaping: all transcript content is untrusted
+
+Treat every value that comes out of a transcript as attacker-controlled.
+The assistant routinely echoes arbitrary user/file/web input verbatim — a
+prompt like *"write an E2E test that types `<script>alert(1)</script>` into
+the field"* lands that payload in assistant prose, a tool result, and a
+Write tool's file content. If it reaches the HTML unescaped it executes when
+the file is opened. There is no "trusted" source here.
+
+Two safe paths, depending on what you emit:
+
+- **Building HTML with f-strings/format** → run every interpolated value
+  through `escape_html()` first (as the input formatter above does with
+  `escaped_query`). Never interpolate a raw field into markup.
+- **Rendering markdown** → use `render_markdown` / `render_markdown_collapsible`.
+  Both use mistune with `escape=True`, so raw HTML tags in the body are
+  escaped to entities and unsafe link/image schemes (`javascript:`, `data:`)
+  are neutralised, while Markdown, code fences and Pygments still render.
+
+Regression coverage lives in `test/test_markdown_rendering.py` (unit) and
+`test/test_xss_browser.py` (empirical: opens the file in a real browser and
+asserts no `alert()` dialog fires). Add a payload-bearing case for any new
+field you render.
+
 ### Update Exports
 
 Add functions to `__all__`: