Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions claude_code_log/discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Unified session discovery across all providers."""

from typing import Iterator, Optional

from .providers import discover_providers
from .providers.base import SessionInfo


def discover_all_sessions(
providers: Optional[list[str]] = None,
) -> Iterator[SessionInfo]:
"""Discover sessions from all available providers.

Args:
providers: Optional list of provider names to include.
If None, discovers from all available providers.

Yields:
SessionInfo objects from all providers.
"""
registry = discover_providers()

if providers is None:
providers = registry.get_available_providers()

for provider_name in providers:
provider = registry.get_provider(provider_name)
if provider and provider.is_available():
yield from provider.discover_sessions()


def discover_sessions_by_provider(provider_name: str) -> Iterator[SessionInfo]:
"""Discover sessions from a specific provider.

Args:
provider_name: Name of the provider to discover sessions from.

Yields:
SessionInfo objects from the specified provider.
"""
registry = discover_providers()
yield from registry.discover_sessions_by_provider(provider_name)


def get_session_stats() -> dict[str, int]:
registry = discover_providers()
stats: dict[str, int] = {}

for provider_name in registry.get_available_providers():
provider = registry.get_provider(provider_name)
if provider:
count = sum(1 for _ in provider.discover_sessions())
stats[provider_name] = count

return stats


def load_session(provider_name: str, session_id: str):
"""Load a session from a specific provider.

Args:
provider_name: Name of the provider.
session_id: ID of the session to load.

Returns:
Iterator of TranscriptEntry objects.
"""
registry = discover_providers()
return registry.load_session(provider_name, session_id)
11 changes: 11 additions & 0 deletions claude_code_log/providers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Provider abstraction layer for multi-provider session support."""

from .base import BaseProvider, SessionInfo
from .registry import ProviderRegistry, discover_providers

__all__ = [
"BaseProvider",
"SessionInfo",
"ProviderRegistry",
"discover_providers",
]
226 changes: 226 additions & 0 deletions claude_code_log/providers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
"""Abstract base class for session providers."""

from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Iterator, Optional, cast

from claude_code_log.models import (
AssistantMessageModel,
AssistantTranscriptEntry,
TextContent,
ThinkingContent,
ToolUseContent,
TranscriptEntry,
UserMessageModel,
UserTranscriptEntry,
)


@dataclass
class SessionInfo:
provider: str
session_id: str
title: Optional[str] = None
created_at: Optional[str] = None
updated_at: Optional[str] = None
project_path: Optional[Path] = None
message_count: int = 0
total_tokens: int = 0


def extract_text(content: Any) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
items: list[Any] = cast(list[Any], content)
parts: list[str] = []
for item in items:
item_dict = cast(dict[str, Any], item) if isinstance(item, dict) else None
if item_dict is not None:
parts.append(str(item_dict.get("text", "")))
elif isinstance(item, str):
parts.append(item)
return "\n".join(parts)
return str(content)


def file_mtime_iso(path: Path) -> str:
return datetime.fromtimestamp(path.stat().st_mtime).isoformat()


def make_user_entry(
session_id: str,
uuid: str,
timestamp: str,
content: Any,
) -> UserTranscriptEntry:
return UserTranscriptEntry(
type="user",
parentUuid=None,
isSidechain=False,
userType="external",
cwd="",
sessionId=session_id,
version="",
uuid=uuid,
timestamp=timestamp,
message=UserMessageModel(
role="user",
content=[TextContent(type="text", text=extract_text(content))],
),
)


def make_tool_result_entry(
session_id: str,
uuid: str,
timestamp: str,
tool_use_id: str,
content: str,
) -> UserTranscriptEntry:
from claude_code_log.models import ToolResultContent

return UserTranscriptEntry(
type="user",
parentUuid=None,
isSidechain=False,
userType="external",
cwd="",
sessionId=session_id,
version="",
uuid=uuid,
timestamp=timestamp,
message=UserMessageModel(
role="user",
content=[
ToolResultContent(
type="tool_result",
tool_use_id=tool_use_id,
content=content,
)
],
),
)


def make_assistant_entry(
session_id: str,
uuid: str,
timestamp: str,
model: str,
content: Any,
) -> AssistantTranscriptEntry:
content_list: list[Any] = (
cast(list[Any], content)
if isinstance(content, list)
else [TextContent(type="text", text=str(content))]
)
return AssistantTranscriptEntry(
type="assistant",
parentUuid=None,
isSidechain=False,
userType="external",
cwd="",
sessionId=session_id,
version="",
uuid=uuid,
timestamp=timestamp,
message=AssistantMessageModel(
id=uuid,
type="message",
role="assistant",
model=model,
content=content_list,
),
)


def make_thinking_entry(
session_id: str,
uuid: str,
timestamp: str,
model: str,
text: str,
) -> AssistantTranscriptEntry:
return AssistantTranscriptEntry(
type="assistant",
parentUuid=None,
isSidechain=False,
userType="external",
cwd="",
sessionId=session_id,
version="",
uuid=uuid,
timestamp=timestamp,
message=AssistantMessageModel(
id=uuid,
type="message",
role="assistant",
model=model,
content=[ThinkingContent(type="thinking", thinking=text)],
),
)


def make_tool_use_entry(
session_id: str,
uuid: str,
timestamp: str,
model: str,
tool_id: str,
tool_name: str,
tool_input: Any,
) -> AssistantTranscriptEntry:
return AssistantTranscriptEntry(
type="assistant",
parentUuid=None,
isSidechain=False,
userType="external",
cwd="",
sessionId=session_id,
version="",
uuid=uuid,
timestamp=timestamp,
message=AssistantMessageModel(
id=uuid,
type="message",
role="assistant",
model=model,
content=[
ToolUseContent(
type="tool_use",
id=tool_id,
name=tool_name,
input=tool_input,
)
],
),
)


class BaseProvider(ABC):
@abstractmethod
def get_provider_name(self) -> str: ...

@abstractmethod
def get_session_format(self) -> str: ...

@abstractmethod
def get_data_dir(self) -> Optional[Path]: ...

@abstractmethod
def discover_sessions(self) -> Iterator[SessionInfo]: ...

@abstractmethod
def load_session(
self, session_id: str, max_messages: Optional[int] = None
) -> Iterator[TranscriptEntry]: ...

def is_available(self) -> bool:
data_dir = self.get_data_dir()
return data_dir is not None and data_dir.exists()

def get_session_stats(self, session_id: str) -> dict[str, Any]:
return {}
56 changes: 56 additions & 0 deletions claude_code_log/providers/claude.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Claude Code session provider."""

from pathlib import Path
from typing import Iterator, Optional

from claude_code_log.models import TranscriptEntry

from .base import BaseProvider, SessionInfo, file_mtime_iso


class ClaudeProvider(BaseProvider):
def get_provider_name(self) -> str:
return "claude"

def get_session_format(self) -> str:
return "jsonl"

def get_data_dir(self) -> Optional[Path]:
data_dir = Path.home() / ".claude" / "projects"
return data_dir if data_dir.exists() else None

def discover_sessions(self) -> Iterator[SessionInfo]:
data_dir = self.get_data_dir()
if data_dir is None:
return

for project_dir in data_dir.iterdir():
if not project_dir.is_dir():
continue
for jsonl_file in project_dir.glob("*.jsonl"):
if jsonl_file.name.startswith("agent-"):
continue
yield SessionInfo(
provider="claude",
session_id=jsonl_file.stem,
project_path=project_dir,
created_at=file_mtime_iso(jsonl_file),
)

def load_session(
self, session_id: str, max_messages: Optional[int] = None
) -> Iterator[TranscriptEntry]:
from claude_code_log.converter import load_transcript

data_dir = self.get_data_dir()
if data_dir is None:
raise ValueError("Claude data directory not found")

for project_dir in data_dir.iterdir():
if not project_dir.is_dir():
continue
jsonl_file = project_dir / f"{session_id}.jsonl"
if jsonl_file.exists():
return iter(load_transcript(jsonl_file))

raise FileNotFoundError(f"Session {session_id} not found")
Comment on lines +40 to +56

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Inspect load_transcript signature/return type
fd -t f 'converter.py' claude_code_log
ast-grep run --pattern 'def load_transcript($$$):
  $$$' --lang python $(fd -t f 'converter.py' claude_code_log)

Repository: daaain/claude-code-log

Length of output: 189


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== outline converter.py =="
ast-grep outline claude_code_log/converter.py --view expanded || true

echo
echo "== search load_transcript / max_messages usages =="
rg -n "load_transcript|max_messages" claude_code_log

Repository: daaain/claude-code-log

Length of output: 4261


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== read converter.py relevant section =="
wc -l claude_code_log/converter.py
sed -n '1,260p' claude_code_log/converter.py

echo
echo "== read provider/registry relevant sections =="
rg -n "def load_session|max_messages|load_transcript" claude_code_log/providers claude_code_log | sed -n '1,200p'

Repository: daaain/claude-code-log

Length of output: 12573


🏁 Script executed:

#!/bin/bash
set -euo pipefail

sed -n '1,260p' claude_code_log/converter.py
echo
sed -n '1,220p' claude_code_log/providers/claude.py
echo
rg -n "max_messages|load_session\\(" claude_code_log

Repository: daaain/claude-code-log

Length of output: 12196


🏁 Script executed:

#!/bin/bash
set -euo pipefail

sed -n '1,260p' claude_code_log/converter.py
echo
sed -n '1,220p' claude_code_log/providers/claude.py
echo
rg -n "max_messages|load_session\\(" claude_code_log

Repository: daaain/claude-code-log

Length of output: 12196


max_messages needs to be enforced in claude_code_log/providers/claude.py::load_session

max_messages is accepted and forwarded by the registry, but this method always returns load_transcript(jsonl_file) in full. That leaves large sessions unbounded and makes the cap parameter a no-op. Either pass the limit through to the transcript loader or stop reading once the cap is reached.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@claude_code_log/providers/claude.py` around lines 40 - 56, The load_session
method in claude_code_log/providers/claude.py ignores the max_messages parameter
and always returns the full transcript from load_transcript(jsonl_file). Update
load_session to enforce the cap by passing max_messages through to the
transcript loading path if supported, or by truncating iteration after
max_messages entries before returning. Keep the change localized to load_session
and any directly related transcript-loading helper it calls.

Loading