Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion claude_code_log/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,10 @@ def main(
)
return

# Out-param: convert_jsonl_to appends whether it actually (re)wrote
# the combined output, so we don't print "Successfully converted" on
# top of its own "is current, skipping regeneration" line on a skip.
regenerated: list[bool] = []
output_path = convert_jsonl_to(
output_format,
input_path,
Expand Down Expand Up @@ -1326,8 +1330,19 @@ def main(
# same dir still regenerates), and `--all-projects` calls this
# with output=None anyway, so its skip is never forced.
force_regenerate=output is not None and _output_path_is_file(output),
regenerated=regenerated,
)
if input_path.is_file():
# On a pure skip the converter already printed its "... is current,
# skipping regeneration" line (single-file and directory paths alike),
# so suppress the redundant/misleading success line for BOTH. The
# `regenerated` out-param carries exactly one bool from
# convert_jsonl_to: True if the combined transcript OR any individual
# session file was (re)written (so `--combined no`, which writes only
# session files, still confirms its work), False when all is current.
was_regenerated = any(regenerated)
if not was_regenerated:
pass
elif input_path.is_file():
click.echo(f"Successfully converted {input_path} to {output_path}")
else:
jsonl_count = len(list(input_path.glob("*.jsonl")))
Expand Down
58 changes: 55 additions & 3 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,7 @@ def convert_jsonl_to(
no_timestamps: bool = False,
no_recaps: bool = False,
force_regenerate: bool = False,
regenerated: Optional[list[bool]] = None,
) -> Path:
"""Convert JSONL transcript(s) to the specified format.

Expand All @@ -1730,6 +1731,15 @@ def convert_jsonl_to(
file at a user-chosen path was kept even when a different
transcript was requested — silent stale content. The tool's own
managed ``combined_transcripts*`` artifacts still use the skip.
regenerated: Optional out-parameter. When a list is passed, a single
bool is appended reporting whether ANY output was actually
(re)written — the combined transcript OR at least one individual
session file (True) — versus everything being current/skipped
(False). Lets the CLI avoid printing a misleading "Successfully
converted" line on a pure skip while still confirming per-session
work (e.g. ``--combined no``, which never writes a combined),
without changing the ``Path`` return contract that ~20 callers
rely on.
"""
if not input_path.exists():
raise FileNotFoundError(f"Input path not found: {input_path}")
Expand Down Expand Up @@ -1829,6 +1839,8 @@ def convert_jsonl_to(
f"All HTML files are current for {input_path.name}, "
"skipping regeneration"
)
if regenerated is not None:
regenerated.append(False)
return output_path

# Phase 2: Load messages (will use fresh cache when available)
Expand Down Expand Up @@ -1894,6 +1906,9 @@ def convert_jsonl_to(
# requested) are still produced by `_generate_individual_session_files`
# below. The function still returns `output_path` for the caller's
# index linking, but the file at that path is not (re-)written.
# Tracks whether the combined output was actually (re)written this call,
# reported via the `regenerated` out-parameter for the CLI's message.
did_regenerate = False
if not write_combined:
pass
elif use_pagination:
Expand Down Expand Up @@ -1932,6 +1947,7 @@ def convert_jsonl_to(
compact=compact,
no_recaps=no_recaps,
)
did_regenerate = True
else:
# Use single-file generation for small projects or filtered views
# Use incremental regeneration via html_cache when available
Expand All @@ -1949,16 +1965,42 @@ def convert_jsonl_to(
or not output_path.exists()
)
else:
# Fallback: old logic for single file mode or no cache
# Fallback: old logic for single file mode or no cache.
#
# is_outdated() only compares the embedded tool version, not the
# source's freshness, so a single .jsonl that grows between runs
# (e.g. an in-progress session re-exported with the same tool
# version) would be wrongly skipped as "current" and serve stale
# HTML (issue #221 follow-up). Mirror the directory path's
# source-tracking intent with an mtime check: regenerate when the
# source file is newer than the existing output. Scoped to a file
# source — a directory source has no single mtime and is handled
# by the cache branch / `cache_was_updated` above.
#
# The directory cache persists the *source* mtime in its DB to
# detect change; single-file mode has no DB, so the *output* file's
# own mtime is the natural, persistence-free basis. It is sufficient
# because the output is always written after its source is read, so
# `output.mtime >= source.mtime` holds post-write; a later append
# bumps the source past it. Both approaches share the same
# filesystem-mtime granularity limit (a sub-tick append racing the
# prior write resolves on the next run) — no worse than the cache.
source_is_newer = (
input_path.is_file()
and output_path.exists()
and input_path.stat().st_mtime > output_path.stat().st_mtime
)
should_regenerate = (
force_regenerate
or renderer.is_outdated(output_path)
or source_is_newer
or from_date is not None
or to_date is not None
or not output_path.exists()
or (input_path.is_dir() and cache_was_updated)
)

did_regenerate = should_regenerate
if should_regenerate:
# For referenced images, pass the output directory
output_dir = output_path.parent
Expand All @@ -1983,9 +2025,16 @@ def convert_jsonl_to(
f"{format.upper()} file {output_path.name} is current, skipping regeneration"
)

# Generate individual session files if requested and in directory mode
# Generate individual session files if requested and in directory mode.
# Its return count feeds `regenerated` below: per-session output is an
# independent axis from the combined write, so a run that rewrites session
# files while the combined stays current (or `--combined no`, which never
# writes a combined) still counts as work done — otherwise the CLI would
# fall silent on it (the combined-only flag is what made `--combined no`
# print nothing despite producing every session file).
sessions_regenerated = 0
if generate_individual_sessions and input_path.is_dir():
_generate_individual_session_files(
sessions_regenerated = _generate_individual_session_files(
format,
messages,
effective_output_dir,
Expand All @@ -2003,6 +2052,9 @@ def convert_jsonl_to(
no_recaps=no_recaps,
)

if regenerated is not None:
regenerated.append(did_regenerate or sessions_regenerated > 0)

return output_path


Expand Down
26 changes: 17 additions & 9 deletions test/test_html_regeneration.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ def test_force_regeneration_with_cache_update(self, tmp_path):
assert "This should force regeneration despite same version" in new_content

def test_single_file_mode_regeneration_behavior(self, tmp_path):
"""Test that single file mode doesn't use cache but still respects version checks."""
"""Single file mode has no cache: it skips on an unchanged source
(version marker current) but regenerates when the source is newer."""
# Setup: Create a single JSONL file
test_data_dir = Path(__file__).parent / "test_data"
jsonl_file = tmp_path / "single_test.jsonl"
Expand All @@ -327,21 +328,28 @@ def test_single_file_mode_regeneration_behavior(self, tmp_path):
# Verify file wasn't regenerated (same mtime)
assert output_file.stat().st_mtime == original_mtime

# Modify file - should NOT auto-regenerate in single file mode because there's no cache
# Modify the source: even without a cache, a single file that grows
# (e.g. an in-progress session) MUST regenerate — the output is now
# older than its source (issue #221 follow-up). Previously single
# file mode wrongly skipped on the version marker alone.
time.sleep(0.1)
new_message = '{"type":"user","timestamp":"2025-07-03T16:40:00Z","parentUuid":null,"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"test_session","version":"1.0.0","uuid":"single_file_msg","message":{"role":"user","content":[{"type":"text","text":"Single file mode test."}]}}\n'
with open(jsonl_file, "a", encoding="utf-8") as f:
f.write(new_message)

# Single file mode doesn't have cache, so it should still skip based on version
with patch("builtins.print") as mock_print:
convert_jsonl_to_html(jsonl_file)
mock_print.assert_any_call(
"HTML file single_test.html is current, skipping regeneration"
)

# Verify file wasn't regenerated (this is expected behavior for single file mode)
assert output_file.stat().st_mtime == original_mtime
# No "skipping" line: it regenerated because the source is newer.
skip_calls = [
c
for c in mock_print.call_args_list
if c.args and "skipping regeneration" in str(c.args[0])
]
assert not skip_calls, f"unexpected skip after source grew: {skip_calls}"

# The output was rewritten (newer mtime) and now contains the new message.
assert output_file.stat().st_mtime > original_mtime
assert "Single file mode test." in output_file.read_text(encoding="utf-8")
Comment on lines +331 to +352

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟡 Minor | ⚡ Quick win

Make this regeneration check coarse-timestamp safe.

This test only sleeps 0.1s before appending, but the production path keys off mtimes. On 1-second-granularity filesystems that can leave the source and HTML in the same tick, so this rerun still skips and stat().st_mtime > original_mtime flakes. test/test_output_explicit.py already avoids that by forcing the source mtime strictly newer with os.utime(...); this case should do the same.

🧰 Tools
🪛 ast-grep (0.44.0)

[warning] 336-336: File path is request-/variable-derived; validate and normalize to prevent path traversal.
Context: open(jsonl_file, "a", encoding="utf-8")
Note: [CWE-22] Improper Limitation of a Pathname to a Restricted Directory ('Path Traversal').

(open-filename-from-request)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/test_html_regeneration.py` around lines 331 - 352, The regeneration
assertion in this test is too dependent on `time.sleep(0.1)` and can flake on
coarse mtime filesystems; update the `convert_jsonl_to_html` setup so the source
file is explicitly made newer than the HTML output using `os.utime(...)`,
matching the approach already used in `test_output_explicit.py`. Keep the checks
around `skip_calls`, `output_file.stat().st_mtime`, and the `"Single file mode
test."` content, but ensure the test advances the source timestamp in a
filesystem-safe way before calling `convert_jsonl_to_html`.



class TestIncrementalHtmlCache:
Expand Down
86 changes: 86 additions & 0 deletions test/test_output_explicit.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,5 +205,91 @@ def test_nonexistent_path_is_outdated(self, tmp_path: Path):
assert MarkdownRenderer().is_outdated(missing) is True


class TestSingleFileStaleness:
"""#221 follow-up: the *default* single-file output (no ``-o``, detail-
suffixed naming) rides the version-only skip, so a source that grows
between runs was wrongly kept stale. And a genuine skip printed BOTH the
converter's "is current, skipping" line and the CLI's "Successfully
converted" line. These pin the fixes for the exact CLI repro:
``claude-code-log <file>.jsonl --detail low`` run, grow source, re-run."""

def test_grown_source_regenerates(self, tmp_path: Path):
src = _write_jsonl(tmp_path / "session.jsonl", "FIRST_turn_marker")
runner = CliRunner()

r1 = runner.invoke(main, [str(src), "--detail", "low"])
assert r1.exit_code == 0, r1.output
out = next(tmp_path.glob("*.low.html"))
assert "FIRST_turn_marker" in out.read_text(encoding="utf-8")
out_mtime = out.stat().st_mtime

# Source progresses (a real append, not a synthetic mtime bump), then
# forced strictly newer than the output so the check is robust on
# coarse-granularity filesystems.
with open(src, "a", encoding="utf-8") as f:
f.write(json.dumps(_user_entry("SECOND_turn_marker")) + "\n")
os.utime(src, (out_mtime + 2, out_mtime + 2))

r2 = runner.invoke(main, [str(src), "--detail", "low"])
assert r2.exit_code == 0, r2.output
assert "skipping regeneration" not in r2.output
second = out.read_text(encoding="utf-8")
assert "SECOND_turn_marker" in second, "stale HTML served after source grew"
assert out.stat().st_mtime > out_mtime

def test_unchanged_source_skips_without_success_line(self, tmp_path: Path):
src = _write_jsonl(tmp_path / "session.jsonl", "ONLY_turn_marker")
runner = CliRunner()

r1 = runner.invoke(main, [str(src), "--detail", "low"])
assert r1.exit_code == 0, r1.output
assert "Successfully converted" in r1.output

# Re-run with the source untouched: skip announced exactly once, and
# NOT also "Successfully converted" (the misleading double message).
r2 = runner.invoke(main, [str(src), "--detail", "low"])
assert r2.exit_code == 0, r2.output
assert "skipping regeneration" in r2.output
assert "Successfully converted" not in r2.output

def test_directory_skip_suppresses_success_line(self, tmp_path: Path):
"""Directory symmetry: a no-op directory re-run early-exits with a
skip line and must NOT also print 'Successfully combined ...' (the
same double-message fix applied to the directory branch)."""
proj = tmp_path / "proj"
proj.mkdir()
_write_jsonl(proj / "s.jsonl", "DIR_turn_marker")
runner = CliRunner()

r1 = runner.invoke(main, [str(proj)])
assert r1.exit_code == 0, r1.output
assert "Successfully combined" in r1.output

r2 = runner.invoke(main, [str(proj)])
assert r2.exit_code == 0, r2.output
assert "skipping regeneration" in r2.output
assert "Successfully combined" not in r2.output

def test_combined_no_confirms_session_generation(self, tmp_path: Path):
"""`--combined no` writes only per-session files (no combined). The
success line is gated on regeneration, and that gate must count
per-session output too — otherwise the run falls completely silent
despite writing every session file. Regression guard: the first
version of the skip-suppression tracked only the combined write."""
proj = tmp_path / "proj"
proj.mkdir()
_write_jsonl(proj / "aaaaaaaa.jsonl", "SESSION_A_marker")
runner = CliRunner()

r1 = runner.invoke(main, [str(proj), "--combined", "no"])
assert r1.exit_code == 0, r1.output
# Per-session files were produced...
assert list(proj.glob("session-*.html")), "no session files written"
# ...and the run confirms that work rather than printing nothing.
assert "individual session files" in r1.output, (
f"--combined no produced no confirmation; output was:\n{r1.output}"
)


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading