From ee2f6dcb10272ea1c55a8a1fc5108e47c0e36212 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Wed, 18 Feb 2026 14:28:26 +0100 Subject: [PATCH 1/5] Add `ask` command for natural-language search across meeting notes Two-stage LLM pipeline: first identifies relevant meetings from summaries, then answers from full transcripts with quote verification. Includes spinner UX, meeting source citation in answers, keyword fallback, and context_size config option. --- README.md | 19 + src/ownscribe/cli.py | 13 + src/ownscribe/config.py | 2 + src/ownscribe/pipeline.py | 14 +- src/ownscribe/search.py | 517 +++++++++++++++ src/ownscribe/summarization/__init__.py | 19 + src/ownscribe/summarization/base.py | 4 + .../summarization/ollama_summarizer.py | 14 + .../summarization/openai_summarizer.py | 36 ++ src/ownscribe/summarization/prompts.py | 35 + tests/test_pipeline.py | 4 +- tests/test_search.py | 605 ++++++++++++++++++ 12 files changed, 1269 insertions(+), 13 deletions(-) create mode 100644 src/ownscribe/search.py create mode 100644 tests/test_search.py diff --git a/README.md b/README.md index fcc5c3b..48f3269 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ All audio, transcripts, and summaries remain local. - **Pipeline progress** — live checklist showing transcription, diarization sub-steps, and summarization progress - **Local LLM summarization** — structured meeting notes via Ollama, LM Studio, or any OpenAI-compatible server - **Summarization templates** — built-in presets for meetings, lectures, and quick briefs; define your own in config +- **Ask your meetings** — ask natural-language questions across all your meeting notes; uses a two-stage LLM pipeline with keyword fallback - **One command** — just run `ownscribe`, press Ctrl+C when done, get transcript + summary ## Requirements @@ -118,10 +119,27 @@ ownscribe devices # list audio devices (uses native CoreAudio w ownscribe apps # list running apps with PIDs for use with --pid ownscribe transcribe recording.wav # transcribe an existing audio file ownscribe summarize transcript.md # summarize an existing transcript +ownscribe ask "question" # search your meetings with a natural-language question ownscribe config # open config file in $EDITOR ownscribe cleanup # remove ownscribe data from disk ``` +### Searching Meeting Notes + +Use `ask` to search across all your meeting notes with natural-language questions: + +```bash +ownscribe ask "What did Anna say about the deadline?" +ownscribe ask "budget decisions" --since 2026-01-01 +ownscribe ask "action items from last week" --limit 5 +``` + +This runs a two-stage pipeline: +1. **Find** — sends meeting summaries to the LLM to identify which meetings are relevant +2. **Answer** — sends the full transcripts of relevant meetings to the LLM to produce an answer with quotes + +If the LLM finds no relevant meetings, a keyword fallback searches summaries and transcripts directly. + ## Configuration Config is stored at `~/.config/ownscribe/config.toml`. Run `ownscribe config` to create and edit it. @@ -149,6 +167,7 @@ backend = "ollama" # "ollama" or "openai" model = "mistral" host = "http://localhost:11434" # template = "meeting" # "meeting", "lecture", "brief", or a custom name +# context_size = 0 # 0 = auto-detect from model; set manually for OpenAI-compatible backends # Custom templates (optional): # [templates.my-standup] diff --git a/src/ownscribe/cli.py b/src/ownscribe/cli.py index 899f3f8..8fc6a2c 100644 --- a/src/ownscribe/cli.py +++ b/src/ownscribe/cli.py @@ -97,6 +97,19 @@ def cli( run_pipeline(config) +@cli.command() +@click.argument("question") +@click.option("--since", default=None, help="Only search meetings after this date (YYYY-MM-DD).") +@click.option("--limit", default=None, type=int, help="Max number of recent meetings to search.") +@click.pass_context +def ask(ctx: click.Context, question: str, since: str | None, limit: int | None) -> None: + """Ask a question across your meeting notes.""" + config = ctx.obj["config"] + from ownscribe.search import ask as run_ask + + run_ask(config, question, since=since, limit=limit) + + @cli.command() def devices() -> None: """List available audio input devices.""" diff --git a/src/ownscribe/config.py b/src/ownscribe/config.py index ba9df2a..4ca4c50 100644 --- a/src/ownscribe/config.py +++ b/src/ownscribe/config.py @@ -35,6 +35,7 @@ model = "mistral" # model name host = "http://localhost:11434" # ollama: :11434, LM Studio: :1234 # template = "meeting" # built-in: "meeting", "lecture", or "brief" +# context_size = 0 # 0 = auto-detect from model; set manually for OpenAI-compatible backends # Custom templates (optional): # [templates.my-notes] @@ -79,6 +80,7 @@ class SummarizationConfig: model: str = "mistral" host: str = "http://localhost:11434" template: str = "" + context_size: int = 0 @dataclass diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index aaaefc6..5664d41 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -17,6 +17,7 @@ from ownscribe.config import Config from ownscribe.progress import PipelineProgress, Spinner +from ownscribe.summarization import create_summarizer # A standard WAV file header (RIFF + fmt + data chunk header) is 44 bytes. # Files at or below this size contain no audio frames. @@ -87,15 +88,6 @@ def _create_transcriber(config: Config, progress=None): return WhisperXTranscriber(config.transcription, diar_config, progress=progress) -def _create_summarizer(config: Config): - """Create the appropriate summarizer based on config.""" - if config.summarization.backend == "openai": - from ownscribe.summarization.openai_summarizer import OpenAISummarizer - return OpenAISummarizer(config.summarization, config.templates) - else: - from ownscribe.summarization.ollama_summarizer import OllamaSummarizer - return OllamaSummarizer(config.summarization, config.templates) - def _format_output(config: Config, transcript_result, summary_text: str | None = None) -> tuple[str, str | None]: """Format transcript and optional summary. Returns (transcript_str, summary_str).""" @@ -233,7 +225,7 @@ def run_summarize(config: Config, transcript_file: str) -> None: """Summarize a transcript file.""" transcript_text = Path(transcript_file).read_text() - summarizer = _create_summarizer(config) + summarizer = create_summarizer(config) if not summarizer.is_available(): click.echo( f"Error: {config.summarization.backend} is not reachable at {config.summarization.host}. " @@ -295,7 +287,7 @@ def _do_transcribe_and_summarize( # 3. Summarize if sum_enabled: - summarizer = _create_summarizer(config) + summarizer = create_summarizer(config) if not summarizer.is_available(): click.echo( f"\nWarning: {config.summarization.backend} is not reachable " diff --git a/src/ownscribe/search.py b/src/ownscribe/search.py new file mode 100644 index 0000000..33e29fd --- /dev/null +++ b/src/ownscribe/search.py @@ -0,0 +1,517 @@ +"""Semantic search over meeting notes using a two-stage LLM pipeline.""" + +from __future__ import annotations + +import json +import re +from datetime import date +from pathlib import Path +from typing import NamedTuple + +import click + +from ownscribe.config import Config +from ownscribe.progress import Spinner +from ownscribe.summarization import create_summarizer +from ownscribe.summarization.base import Summarizer +from ownscribe.summarization.prompts import ( + SEARCH_ANSWER_PROMPT, + SEARCH_ANSWER_SYSTEM, + SEARCH_FIND_PROMPT, + SEARCH_FIND_SYSTEM, +) + +_DEFAULT_CONTEXT_SIZE = 8192 + + +class Meeting(NamedTuple): + folder_name: str + display_name: str + summary_path: Path + transcript_path: Path | None + + +def ask(config: Config, question: str, since: str | None, limit: int | None) -> None: + """Run a two-stage search over meeting notes and print the answer.""" + output_dir = config.output.resolved_dir + meetings, skipped = _discover_meetings(output_dir, since=since, limit=limit) + + if not meetings: + click.echo("No meetings found.") + return + + if skipped: + click.echo(f"({skipped} meetings without summaries were skipped)") + + summarizer = create_summarizer(config) + context_size = _resolve_context_size(config) + + # Stage 1 + label = f"Searching {len(meetings)} meetings" + with Spinner(label) as spinner: + relevant = _find_relevant_meetings( + summarizer, question, meetings, context_size, spinner=spinner, + ) + spinner.update(label) # restore label so exit message is clean + + if not relevant: + click.echo("No relevant meetings found for your question.") + return + + click.echo(f"Found {len(relevant)} relevant meetings:") + for m in relevant: + click.echo(f" - {m.display_name}") + + # Stage 2 + with Spinner("Analyzing transcripts"): + answer, skipped_transcripts = _answer_from_transcripts(summarizer, question, relevant, context_size) + answer = _verify_quotes(answer, _load_transcripts(relevant)) + + if skipped_transcripts: + click.echo(f"({skipped_transcripts} transcripts did not fit within context budget, they were skipped)") + + click.echo(answer) + + + +def _resolve_context_size(config: Config) -> int: + if config.summarization.context_size > 0: + return config.summarization.context_size + + if config.summarization.backend == "ollama": + try: + import ollama + + client = ollama.Client(host=config.summarization.host) + info = client.show(config.summarization.model) + # Ollama returns model info with context window details + model_info = info.get("model_info", {}) + for key, value in model_info.items(): + if "context_length" in key: + return int(value) + except Exception: + pass + + return _DEFAULT_CONTEXT_SIZE + + +# -- Discovery -- + + +_FOLDER_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})_(\d{2})(\d{2})(?:_(.+))?$") + + +def _parse_folder_name(name: str) -> tuple[str, str] | None: + """Parse a folder name like '2026-02-13_1501_quarterly-planning'. + + Also accepts folders without a slug, e.g. '2026-02-16_1433'. + Returns (date_str, display_name) or None if the name doesn't match. + """ + m = _FOLDER_RE.match(name) + if not m: + return None + date_str = m.group(1) + hour, minute = m.group(2), m.group(3) + slug = m.group(4) + if slug: + title = slug.replace("-", " ").title() + return date_str, f"{date_str} {hour}:{minute} — {title}" + return date_str, f"{date_str} {hour}:{minute}" + + +def _discover_meetings( + output_dir: Path, since: str | None, limit: int | None, +) -> tuple[list[Meeting], int]: + if not output_dir.exists(): + return [], 0 + + meetings: list[Meeting] = [] + skipped = 0 + + for folder in sorted(output_dir.iterdir(), reverse=True): + if not folder.is_dir(): + continue + parsed = _parse_folder_name(folder.name) + if parsed is None: + continue + + date_str, display_name = parsed + + # Find summary file + summary_path = None + for ext in ("summary.md", "summary.json"): + candidate = folder / ext + if candidate.exists(): + summary_path = candidate + break + + if summary_path is None: + skipped += 1 + continue + + # Find transcript file + transcript_path = None + for ext in ("transcript.md", "transcript.json"): + candidate = folder / ext + if candidate.exists(): + transcript_path = candidate + break + + # Apply --since filter + if since: + try: + since_date = date.fromisoformat(since) + meeting_date = date.fromisoformat(date_str) + if meeting_date < since_date: + continue + except ValueError: + pass + + meetings.append(Meeting(folder.name, display_name, summary_path, transcript_path)) + + # Apply --limit cap (meetings are already newest-first) + if limit is not None and limit > 0: + meetings = meetings[:limit] + + return meetings, skipped + + +# -- Token estimation -- + + +def _estimate_tokens(text: str) -> int: + return len(text) // 4 + + +# -- Chunking -- + + +def _build_summary_chunks( + meetings: list[Meeting], context_budget: int, +) -> list[list[Meeting]]: + effective = int(context_budget * 0.8) + overhead = 1000 # system prompt + question + response headroom + budget = max(effective - overhead, 500) + + chunks: list[list[Meeting]] = [] + current_chunk: list[Meeting] = [] + current_size = 0 + + for m in meetings: + summary_text = m.summary_path.read_text() + header = f"## [{m.folder_name}]\n" + entry_tokens = _estimate_tokens(header + summary_text) + + if current_chunk and current_size + entry_tokens > budget: + chunks.append(current_chunk) + current_chunk = [] + current_size = 0 + + current_chunk.append(m) + current_size += entry_tokens + + if current_chunk: + chunks.append(current_chunk) + + return chunks + + +# -- Stage 1: Find relevant meetings -- + + +_JSON_RE = re.compile(r'\{[^{}]*"relevant"[^{}]*\}', re.DOTALL) +_ARRAY_RE = re.compile(r'\[.*?\]', re.DOTALL) + + +def _parse_relevant_ids(response: str) -> list[str] | None: + """Extract meeting IDs from a Stage 1 LLM response. + + Returns a list of IDs, or None if parsing fails entirely. + """ + # Try direct JSON parse + try: + data = json.loads(response) + if isinstance(data, dict) and "relevant" in data: + ids = data["relevant"] + if isinstance(ids, list): + return [str(i) for i in ids] + except (json.JSONDecodeError, TypeError): + pass + + # Fallback: regex for JSON object with "relevant" + m = _JSON_RE.search(response) + if m: + try: + data = json.loads(m.group()) + if isinstance(data, dict) and "relevant" in data: + ids = data["relevant"] + if isinstance(ids, list): + return [str(i) for i in ids] + except (json.JSONDecodeError, TypeError): + pass + + # Fallback: bare array + m = _ARRAY_RE.search(response) + if m: + try: + ids = json.loads(m.group()) + if isinstance(ids, list): + return [str(i) for i in ids] + except (json.JSONDecodeError, TypeError): + pass + + return None + + +_STOP_WORDS = frozenset([ + "a", "an", "the", "is", "are", "was", "were", "be", "been", "being", + "have", "has", "had", "do", "does", "did", "will", "would", "shall", + "should", "may", "might", "can", "could", "of", "in", "to", "for", "on", + "with", "at", "by", "from", "about", "into", "through", "during", "before", + "after", "above", "below", "between", "out", "off", "over", "under", + "again", "further", "then", "once", "here", "there", "when", "where", + "why", "how", "all", "each", "every", "both", "few", "more", "most", + "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", + "than", "too", "very", "and", "but", "or", "if", "what", "which", "who", + "whom", "this", "that", "these", "those", "i", "me", "my", "myself", "we", + "our", "ours", "ourselves", "you", "your", "yours", "yourself", + "yourselves", "he", "him", "his", "himself", "she", "her", "hers", + "herself", "it", "its", "itself", "they", "them", "their", "theirs", + "themselves", "am", "s", "t", "d", "ll", "ve", "re", +]) + + +def _extract_keywords(text: str) -> set[str]: + """Extract lowercase keywords from *text*, filtering stop words.""" + words = set(re.findall(r"[a-z]+", text.lower())) + filtered = words - _STOP_WORDS + return filtered if filtered else words + + +def _keyword_fallback( + question: str, meetings: list[Meeting], +) -> list[Meeting]: + """Return meetings whose summary or transcript shares keywords with *question*.""" + kw = _extract_keywords(question) + if not kw: + return [] + + matches: list[Meeting] = [] + for m in meetings: + summary_text = m.summary_path.read_text() + summary_words = set(re.findall(r"[a-z]+", summary_text.lower())) + if kw & summary_words: + matches.append(m) + continue + if m.transcript_path and m.transcript_path.exists(): + transcript_text = m.transcript_path.read_text() + transcript_words = set(re.findall(r"[a-z]+", transcript_text.lower())) + if kw & transcript_words: + matches.append(m) + return matches + + +def _find_relevant_meetings( + summarizer: Summarizer, + question: str, + meetings: list[Meeting], + context_size: int, + spinner: Spinner | None = None, +) -> list[Meeting]: + chunks = _build_summary_chunks(meetings, context_size) + all_relevant_ids: set[str] = set() + total_chunks = len(chunks) + + for i, chunk in enumerate(chunks): + if total_chunks > 1 and spinner is not None: + spinner.update(f"Searching meetings (batch {i + 1}/{total_chunks})") + + known_ids = {m.folder_name for m in chunk} + + # Build summaries text + summaries_parts = [] + for m in chunk: + summary_text = m.summary_path.read_text() + summaries_parts.append(f"## [{m.folder_name}]\n{summary_text}") + summaries = "\n\n".join(summaries_parts) + + prompt = SEARCH_FIND_PROMPT.format(question=question, summaries=summaries) + response = summarizer.chat(SEARCH_FIND_SYSTEM, prompt, json_mode=True) + + ids = _parse_relevant_ids(response) + if ids is None: + # Total fallback: include all from this chunk + all_relevant_ids.update(known_ids) + else: + # Validate: only keep IDs that exist in this chunk + all_relevant_ids.update(id_ for id_ in ids if id_ in known_ids) + + if not all_relevant_ids: + keyword_matches = _keyword_fallback(question, meetings) + if keyword_matches: + if spinner is not None: + spinner.update("Falling back to keyword search") + return _rank_meetings(question, keyword_matches) + + # Build result list preserving order, then rank + meeting_by_id = {m.folder_name: m for m in meetings} + relevant = [meeting_by_id[id_] for id_ in all_relevant_ids if id_ in meeting_by_id] + return _rank_meetings(question, relevant) + + +# -- Ranking -- + + +def _rank_meetings(question: str, meetings: list[Meeting]) -> list[Meeting]: + """Rank meetings by keyword overlap, speaker mention, and recency.""" + question_words = set(question.lower().split()) + + def score(m: Meeting) -> tuple[float, float, str]: + summary_text = m.summary_path.read_text().lower() + summary_words = set(summary_text.split()) + + # Keyword overlap + overlap = len(question_words & summary_words) + + # Speaker boost: if a capitalized word in the question appears in the summary + speaker_boost = 0.0 + for word in question.split(): + if word[0:1].isupper() and len(word) > 1 and word.lower() in summary_text: + speaker_boost = 2.0 + break + + # Recency: folder name sorts lexicographically by date + return (overlap + speaker_boost, speaker_boost, m.folder_name) + + return sorted(meetings, key=score, reverse=True) + + +# -- Stage 2: Answer from transcripts -- + + +def _load_transcripts(meetings: list[Meeting]) -> dict[str, str]: + """Load transcript text keyed by folder name.""" + transcripts: dict[str, str] = {} + for m in meetings: + if m.transcript_path and m.transcript_path.exists(): + transcripts[m.folder_name] = m.transcript_path.read_text() + return transcripts + + +def _answer_from_transcripts( + summarizer: Summarizer, + question: str, + meetings: list[Meeting], + context_size: int, +) -> tuple[str, int]: + effective = int(context_size * 0.8) + overhead = 1500 # system prompt + question + response headroom + budget = max(effective - overhead, 500) + + transcript_parts: list[str] = [] + used_tokens = 0 + included = 0 + skipped = 0 + + for m in meetings: + if m.transcript_path is None or not m.transcript_path.exists(): + skipped += 1 + continue + + text = m.transcript_path.read_text() + entry = f"## [{m.folder_name}] {m.display_name}\n{text}" + entry_tokens = _estimate_tokens(entry) + + if used_tokens + entry_tokens > budget: + skipped += 1 + continue + + transcript_parts.append(entry) + used_tokens += entry_tokens + included += 1 + + if not transcript_parts: + return "No transcript text available for the relevant meetings.", skipped + + transcripts_text = "\n\n".join(transcript_parts) + prompt = SEARCH_ANSWER_PROMPT.format(question=question, transcripts=transcripts_text) + return summarizer.chat(SEARCH_ANSWER_SYSTEM, prompt), skipped + + +# -- Quote verification -- + + +def _extract_quotes(text: str) -> list[str]: + """Extract quoted text from the answer (> blockquotes and "..." quotes).""" + quotes: list[str] = [] + + # Blockquotes: lines starting with > + blockquote_lines: list[str] = [] + for line in text.split("\n"): + stripped = line.strip() + if stripped.startswith(">"): + blockquote_lines.append(stripped.lstrip("> ").strip()) + else: + if blockquote_lines: + quotes.append(" ".join(blockquote_lines)) + blockquote_lines = [] + if blockquote_lines: + quotes.append(" ".join(blockquote_lines)) + + # Inline quotes: text in "..." + for m in re.finditer(r'"([^"]{20,})"', text): + quotes.append(m.group(1)) + + return quotes + + +def _key_phrases(quote: str, min_words: int = 8, max_words: int = 12) -> list[str]: + """Extract key phrases (8-12 word spans) from a quote for verification.""" + words = quote.split() + if len(words) <= max_words: + return [" ".join(words)] if len(words) >= 4 else [] + + phrases = [] + # Take a few spans spread across the quote + step = max(1, (len(words) - min_words) // 3) + for start in range(0, len(words) - min_words + 1, step): + end = min(start + max_words, len(words)) + phrases.append(" ".join(words[start:end])) + if len(phrases) >= 3: + break + return phrases + + +def _verify_quotes(answer: str, transcripts: dict[str, str]) -> str: + """Check quotes against transcripts and mark unverified ones.""" + if not transcripts: + return answer + + all_text = "\n".join(transcripts.values()).lower() + quotes = _extract_quotes(answer) + + unverified_quotes: set[str] = set() + for quote in quotes: + phrases = _key_phrases(quote) + if not phrases: + continue + verified = any(phrase.lower() in all_text for phrase in phrases) + if not verified: + unverified_quotes.add(quote) + + if not unverified_quotes: + return answer + + # Annotate unverified quotes in the answer + result_lines: list[str] = [] + for line in answer.split("\n"): + result_lines.append(line) + stripped = line.strip() + # Check if this line ends a blockquote that contains an unverified quote + if stripped.startswith(">"): + quote_text = stripped.lstrip("> ").strip() + for uq in unverified_quotes: + if quote_text and quote_text in uq: + result_lines.append("[unverified]") + break + return "\n".join(result_lines) diff --git a/src/ownscribe/summarization/__init__.py b/src/ownscribe/summarization/__init__.py index e69de29..558b8e2 100644 --- a/src/ownscribe/summarization/__init__.py +++ b/src/ownscribe/summarization/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ownscribe.config import Config + from ownscribe.summarization.base import Summarizer + + +def create_summarizer(config: Config) -> Summarizer: + """Create the appropriate summarizer based on config.""" + if config.summarization.backend == "openai": + from ownscribe.summarization.openai_summarizer import OpenAISummarizer + + return OpenAISummarizer(config.summarization, config.templates) + else: + from ownscribe.summarization.ollama_summarizer import OllamaSummarizer + + return OllamaSummarizer(config.summarization, config.templates) diff --git a/src/ownscribe/summarization/base.py b/src/ownscribe/summarization/base.py index db38470..0df2930 100644 --- a/src/ownscribe/summarization/base.py +++ b/src/ownscribe/summarization/base.py @@ -16,6 +16,10 @@ def summarize(self, transcript_text: str) -> str: def generate_title(self, summary_text: str) -> str: """Generate a short meeting title from a summary.""" + @abc.abstractmethod + def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + """Send a chat completion request and return the response text.""" + @abc.abstractmethod def is_available(self) -> bool: """Check if the summarization backend is reachable.""" diff --git a/src/ownscribe/summarization/ollama_summarizer.py b/src/ownscribe/summarization/ollama_summarizer.py index 5a108e5..6a90f90 100644 --- a/src/ownscribe/summarization/ollama_summarizer.py +++ b/src/ownscribe/summarization/ollama_summarizer.py @@ -17,6 +17,20 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) - self._templates = templates or {} self._client = ollama.Client(host=config.host) + def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + kwargs = {} + if json_mode: + kwargs["format"] = "json" + response = self._client.chat( + model=self._config.model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + **kwargs, + ) + return clean_response(response["message"]["content"]) + def is_available(self) -> bool: try: self._client.list() diff --git a/src/ownscribe/summarization/openai_summarizer.py b/src/ownscribe/summarization/openai_summarizer.py index b000849..2d1128f 100644 --- a/src/ownscribe/summarization/openai_summarizer.py +++ b/src/ownscribe/summarization/openai_summarizer.py @@ -21,6 +21,42 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) - base_url = base_url.rstrip("/") + "/v1" self._client = openai.OpenAI(base_url=base_url, api_key="not-needed") + def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + formats_to_try: list[dict | None] = [None] + if json_mode: + formats_to_try = [ + {"type": "json_object"}, + {"type": "json_schema", "json_schema": { + "name": "search_results", + "strict": True, + "schema": { + "type": "object", + "properties": {"relevant": {"type": "array", "items": {"type": "string"}}}, + "required": ["relevant"], + "additionalProperties": False, + }, + }}, + None, + ] + for fmt in formats_to_try: + try: + kwargs = {} + if fmt is not None: + kwargs["response_format"] = fmt + response = self._client.chat.completions.create( + model=self._config.model, + messages=messages, + **kwargs, + ) + return clean_response(response.choices[0].message.content or "") + except openai.BadRequestError: + continue + return "" + def is_available(self) -> bool: try: self._client.models.list() diff --git a/src/ownscribe/summarization/prompts.py b/src/ownscribe/summarization/prompts.py index b2df770..8ad8dd8 100644 --- a/src/ownscribe/summarization/prompts.py +++ b/src/ownscribe/summarization/prompts.py @@ -112,3 +112,38 @@ def resolve_template( def list_templates() -> list[str]: """Return the names of all built-in templates.""" return list(TEMPLATES.keys()) + + +# --- Search prompts --- + +SEARCH_FIND_SYSTEM = ( + "You are a meeting search assistant. Given meeting summaries and a question, " + "identify which meetings are relevant to answering the question. " + "You MUST choose only from the provided meeting IDs. " + "Return at most 5 meetings per request. When in doubt, include the meeting " + "— it is better to include a marginally relevant meeting than to miss one. " + 'Return a JSON object: {"relevant": ["id1", "id2"]}. ' + 'If none are relevant, return {"relevant": []}.' +) + +SEARCH_FIND_PROMPT = """Question: {question} + +Meetings: +{summaries} + +Return ONLY valid JSON: {{"relevant": ["meeting-id-1", "meeting-id-2"]}}""" + +SEARCH_ANSWER_SYSTEM = ( + "You are a meeting assistant. Answer the user's question based on the meeting " + "transcripts provided. Each transcript is prefixed with a header like " + "'## [folder-name] Date Time — Title'. " + "When quoting or referencing information, always cite the meeting it came from " + "by including the meeting display name (e.g., '2026-02-13 15:01 — Quarterly Planning'). " + "Use verbatim text from the transcript for quotes. " + "Include the speaker label and timestamp for each quote. " + "If the answer is not found in the transcripts, say so." +) + +SEARCH_ANSWER_PROMPT = """Question: {question} + +{transcripts}""" diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f1bc8df..fa12194 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -181,7 +181,7 @@ def test_transcribe_and_summarize(self, tmp_path): with ( mock.patch("ownscribe.pipeline._create_transcriber", return_value=mock_transcriber), - mock.patch("ownscribe.pipeline._create_summarizer", return_value=mock_summarizer), + mock.patch("ownscribe.pipeline.create_summarizer", return_value=mock_summarizer), ): _do_transcribe_and_summarize(config, audio_path, tmp_path, summarize=True) @@ -206,7 +206,7 @@ def test_summarizer_unavailable_skips_gracefully(self, tmp_path): with ( mock.patch("ownscribe.pipeline._create_transcriber", return_value=mock_transcriber), - mock.patch("ownscribe.pipeline._create_summarizer", return_value=mock_summarizer), + mock.patch("ownscribe.pipeline.create_summarizer", return_value=mock_summarizer), ): _do_transcribe_and_summarize(config, audio_path, tmp_path, summarize=True) diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 0000000..ece50d4 --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,605 @@ +"""Tests for the semantic search module.""" + +from __future__ import annotations + +from pathlib import Path + +import click + +from ownscribe.search import ( + _answer_from_transcripts, + _build_summary_chunks, + _discover_meetings, + _estimate_tokens, + _extract_keywords, + _extract_quotes, + _find_relevant_meetings, + _key_phrases, + _keyword_fallback, + _parse_relevant_ids, + _rank_meetings, + _verify_quotes, +) + +# -- Helpers -- + + +def _make_meeting_dir(base: Path, folder_name: str, summary: str, transcript: str | None = None) -> None: + folder = base / folder_name + folder.mkdir(parents=True, exist_ok=True) + (folder / "summary.md").write_text(summary) + if transcript is not None: + (folder / "transcript.md").write_text(transcript) + + +class FakeSummarizer: + """A fake summarizer that returns canned responses.""" + + def __init__(self, responses: list[str] | None = None): + self.calls: list[tuple[str, str, bool]] = [] + self._responses = list(responses or []) + self._call_idx = 0 + + def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + self.calls.append((system_prompt, user_prompt, json_mode)) + if self._responses: + resp = self._responses[self._call_idx % len(self._responses)] + self._call_idx += 1 + return resp + return '{"relevant": []}' + + +# -- Discovery tests -- + + +class TestDiscoverMeetings: + def test_discover_meetings(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary of Q1 planning") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Daily standup notes") + + meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None) + assert len(meetings) == 2 + assert skipped == 0 + # newest first + assert meetings[0].folder_name == "2026-02-13_1501_quarterly-planning" + assert meetings[1].folder_name == "2026-02-12_0930_team-standup" + assert "Quarterly Planning" in meetings[0].display_name + assert "2026-02-13 15:01" in meetings[0].display_name + + def test_discover_meetings_no_slug(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-16_1433", "Summary without title") + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary of Q1 planning") + + meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None) + assert len(meetings) == 2 + assert skipped == 0 + # newest first + assert meetings[0].folder_name == "2026-02-16_1433" + assert meetings[0].display_name == "2026-02-16 14:33" + assert meetings[1].folder_name == "2026-02-13_1501_quarterly-planning" + + def test_discover_meetings_since_filter(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Summary") + _make_meeting_dir(tmp_path, "2026-01-10_1000_old-meeting", "Summary") + + meetings, _ = _discover_meetings(tmp_path, since="2026-02-01", limit=None) + assert len(meetings) == 2 + assert all("2026-02" in m.folder_name for m in meetings) + + def test_discover_meetings_limit(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Summary") + _make_meeting_dir(tmp_path, "2026-02-11_1000_old-meeting", "Summary") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=2) + assert len(meetings) == 2 + # Should be the 2 newest + assert meetings[0].folder_name == "2026-02-13_1501_quarterly-planning" + assert meetings[1].folder_name == "2026-02-12_0930_team-standup" + + def test_discover_meetings_skips_no_summary(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary") + # Folder with no summary file + no_summary = tmp_path / "2026-02-12_0930_team-standup" + no_summary.mkdir() + (no_summary / "transcript.md").write_text("some transcript") + + meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None) + assert len(meetings) == 1 + assert skipped == 1 + assert meetings[0].folder_name == "2026-02-13_1501_quarterly-planning" + + def test_discover_meetings_empty_dir(self, tmp_path): + meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None) + assert meetings == [] + assert skipped == 0 + + def test_discover_meetings_nonexistent_dir(self, tmp_path): + meetings, skipped = _discover_meetings(tmp_path / "nonexistent", since=None, limit=None) + assert meetings == [] + assert skipped == 0 + + +# -- Token estimation -- + + +class TestEstimateTokens: + def test_estimate_tokens(self): + assert _estimate_tokens("a" * 100) == 25 + assert _estimate_tokens("") == 0 + assert _estimate_tokens("hello world") == 2 + + +# -- Chunking -- + + +class TestBuildSummaryChunks: + def test_build_summary_chunks(self, tmp_path): + # Create meetings with summaries of known size + for i in range(5): + _make_meeting_dir( + tmp_path, + f"2026-02-{10+i:02d}_1000_meeting-{i}", + "x" * 2000, # 500 tokens each + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + # Small budget that forces multiple chunks + chunks = _build_summary_chunks(meetings, context_budget=2000) + assert len(chunks) > 1 + # All meetings accounted for + all_ids = {m.folder_name for chunk in chunks for m in chunk} + assert all_ids == {m.folder_name for m in meetings} + + def test_build_summary_chunks_single(self, tmp_path): + for i in range(3): + _make_meeting_dir( + tmp_path, + f"2026-02-{10+i:02d}_1000_meeting-{i}", + "Short summary", + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + # Large budget - everything fits in one chunk + chunks = _build_summary_chunks(meetings, context_budget=100000) + assert len(chunks) == 1 + assert len(chunks[0]) == 3 + + +# -- Parse relevant IDs -- + + +class TestParseRelevantIds: + def test_valid_json(self): + assert _parse_relevant_ids('{"relevant": ["id1", "id2"]}') == ["id1", "id2"] + + def test_empty_relevant(self): + assert _parse_relevant_ids('{"relevant": []}') == [] + + def test_json_with_extra_text(self): + response = 'Here are the results: {"relevant": ["id1"]} That is all.' + assert _parse_relevant_ids(response) == ["id1"] + + def test_bare_array(self): + assert _parse_relevant_ids('["id1", "id2"]') == ["id1", "id2"] + + def test_unparseable(self): + assert _parse_relevant_ids("I don't know") is None + + +# -- Keyword extraction -- + + +class TestExtractKeywords: + def test_filters_stop_words(self): + kw = _extract_keywords("What is the budget for the project?") + assert "budget" in kw + assert "project" in kw + assert "the" not in kw + assert "is" not in kw + + def test_strips_punctuation(self): + kw = _extract_keywords("Hello, world! How's it going?") + assert "hello" in kw + assert "world" in kw + + def test_all_stop_words_fallback(self): + kw = _extract_keywords("is the a") + # Falls back to returning all words when only stop words remain + assert len(kw) > 0 + assert "is" in kw + + def test_empty_string(self): + kw = _extract_keywords("") + assert kw == set() + + +# -- Keyword fallback -- + + +class TestKeywordFallback: + def test_match_on_summary(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "budget review fiscal quarter") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "standup tasks blockers") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + matches = _keyword_fallback("What about the budget?", meetings) + assert len(matches) == 1 + assert matches[0].folder_name == "2026-02-13_1501_quarterly-planning" + + def test_match_on_transcript_only(self, tmp_path): + _make_meeting_dir( + tmp_path, "2026-02-13_1501_quarterly-planning", + "Generic meeting notes", # summary has no useful keywords + "Alice discussed the infrastructure migration timeline", # transcript does + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + matches = _keyword_fallback("infrastructure migration", meetings) + assert len(matches) == 1 + assert matches[0].folder_name == "2026-02-13_1501_quarterly-planning" + + def test_no_match(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "budget review fiscal quarter") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + matches = _keyword_fallback("kubernetes deployment", meetings) + assert len(matches) == 0 + + +# -- Find relevant meetings -- + + +class TestFindRelevantMeetings: + def test_single_batch(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary about deadlines and Q1") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Daily standup notes") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + fake = FakeSummarizer(['{"relevant": ["2026-02-13_1501_quarterly-planning"]}']) + + result = _find_relevant_meetings(fake, "What about the deadline?", meetings, 100000) + assert len(result) == 1 + assert result[0].folder_name == "2026-02-13_1501_quarterly-planning" + assert fake.calls[0][2] is True # json_mode + + def test_drops_unknown_ids(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + fake = FakeSummarizer(['{"relevant": ["2026-02-13_1501_quarterly-planning", "nonexistent-id"]}']) + + result = _find_relevant_meetings(fake, "question", meetings, 100000) + assert len(result) == 1 + assert result[0].folder_name == "2026-02-13_1501_quarterly-planning" + + def test_multi_batch(self, tmp_path): + for i in range(5): + _make_meeting_dir( + tmp_path, + f"2026-02-{10+i:02d}_1000_meeting-{i}", + "x" * 2000, + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + # Meetings are newest-first, each gets its own batch: + # batch 0=meeting-4, 1=meeting-3, 2=meeting-2, 3=meeting-1, 4=meeting-0 + responses = [ + '{"relevant": ["2026-02-14_1000_meeting-4"]}', + '{"relevant": []}', + '{"relevant": []}', + '{"relevant": ["2026-02-11_1000_meeting-1"]}', + '{"relevant": []}', + ] + fake = FakeSummarizer(responses) + + result = _find_relevant_meetings(fake, "question", meetings, context_size=2000) + result_ids = {m.folder_name for m in result} + assert "2026-02-14_1000_meeting-4" in result_ids + assert "2026-02-11_1000_meeting-1" in result_ids + + def test_json_fallback_regex(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + # Malformed but extractable JSON + fake = FakeSummarizer(['Sure! {"relevant": ["2026-02-13_1501_quarterly-planning"]}']) + + result = _find_relevant_meetings(fake, "question", meetings, 100000) + assert len(result) == 1 + + def test_total_fallback(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Summary") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + # Completely unparseable + fake = FakeSummarizer(["I have no idea what to return"]) + + result = _find_relevant_meetings(fake, "question", meetings, 100000) + # All meetings should be included as fallback + assert len(result) == 2 + + +class TestFindRelevantMeetingsKeywordFallback: + def test_llm_empty_falls_back_to_keywords(self, tmp_path): + """When the LLM returns no relevant IDs, keyword fallback finds the meeting.""" + _make_meeting_dir( + tmp_path, + "2026-02-13_1501_quarterly-planning", + "Generic meeting summary", # summary is too vague for LLM + "Alice discussed the infrastructure migration timeline in detail", + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + # LLM returns empty relevant list + fake = FakeSummarizer(['{"relevant": []}']) + + result = _find_relevant_meetings( + fake, "infrastructure migration", meetings, 100000, + ) + assert len(result) == 1 + assert result[0].folder_name == "2026-02-13_1501_quarterly-planning" + + +# -- Ranking -- + + +class TestRankMeetings: + def test_keyword_overlap(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_budget-review", "budget review fiscal quarter spending") + _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "standup tasks blockers") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + ranked = _rank_meetings("What was discussed about the budget?", meetings) + assert ranked[0].folder_name == "2026-02-13_1501_budget-review" + + def test_speaker_boost(self, tmp_path): + _make_meeting_dir(tmp_path, "2026-02-13_1501_planning", "Bob discussed the timeline") + _make_meeting_dir(tmp_path, "2026-02-12_0930_standup", "Anna mentioned the deadline and blockers") + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + ranked = _rank_meetings("What did Anna say about the deadline?", meetings) + assert ranked[0].folder_name == "2026-02-12_0930_standup" + + +# -- Answer from transcripts -- + + +class TestAnswerFromTranscripts: + def test_answer(self, tmp_path): + _make_meeting_dir( + tmp_path, + "2026-02-13_1501_quarterly-planning", + "Summary", + "Alice: The deadline is March 15th.\nBob: Got it.", + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + fake = FakeSummarizer(["Anna said the deadline is March 15th."]) + + answer, skipped = _answer_from_transcripts(fake, "What about the deadline?", meetings, 100000) + assert "March 15th" in answer + assert skipped == 0 + + def test_budget_overflow(self, tmp_path): + # Create a meeting with a very large transcript + _make_meeting_dir( + tmp_path, + "2026-02-13_1501_big-meeting", + "Summary", + "x" * 100000, + ) + _make_meeting_dir( + tmp_path, + "2026-02-12_0930_small-meeting", + "Summary", + "Alice: Short transcript.", + ) + + meetings, _ = _discover_meetings(tmp_path, since=None, limit=None) + fake = FakeSummarizer(["Answer based on available transcripts."]) + + # Tiny budget: only the small one should fit + answer, skipped = _answer_from_transcripts(fake, "question", meetings, context_size=2000) + # The big one should be skipped, function should still return an answer + assert "Answer based on available transcripts." in answer + assert skipped > 0 + + +# -- Quote verification -- + + +class TestVerifyQuotes: + def test_phrase_match(self): + answer = '> The deadline for Q1 deliverables is March 15th but I think we should aim for March 10th' + transcripts = { + "meeting-1": ( + "The deadline for Q1 deliverables is March 15th but I think" + " we should aim for March 10th to have buffer time." + ), + } + result = _verify_quotes(answer, transcripts) + assert "[unverified]" not in result + + def test_not_found(self): + answer = '> The completely fabricated quote that does not exist in any transcript at all' + transcripts = { + "meeting-1": "Alice: Let's discuss the budget.\nBob: Sure, sounds good.", + } + result = _verify_quotes(answer, transcripts) + assert "[unverified]" in result + + def test_empty_transcripts(self): + answer = "> Some quote here" + result = _verify_quotes(answer, {}) + assert result == answer + + +class TestExtractQuotes: + def test_blockquotes(self): + text = "Text before\n> This is a quote\n> continued here\nText after" + quotes = _extract_quotes(text) + assert any("This is a quote" in q for q in quotes) + + def test_inline_quotes(self): + text = 'She said "The deadline is definitely March 15th for all deliverables" in the meeting.' + quotes = _extract_quotes(text) + assert any("deadline" in q for q in quotes) + + +class TestKeyPhrases: + def test_short_quote(self): + phrases = _key_phrases("too short") + assert phrases == [] + + def test_medium_quote(self): + phrases = _key_phrases("one two three four five six seven eight nine ten") + assert len(phrases) >= 1 + assert all(len(p.split()) >= 8 for p in phrases) + + +# -- Integration test -- + + +class TestAskIntegration: + def test_end_to_end(self, httpserver, tmp_path, monkeypatch): + # Set up mock meetings + _make_meeting_dir( + tmp_path, + "2026-02-13_1501_quarterly-planning", + "Discussed Q1 deadlines. Anna mentioned March 15th target.", + "[00:01:00] Anna: The deadline is March 15th.\n[00:01:30] Bob: Acknowledged.", + ) + _make_meeting_dir( + tmp_path, + "2026-02-12_0930_team-standup", + "Daily standup. Discussed blockers.", + "[00:00:30] Charlie: No updates.\n[00:01:00] Dave: Still blocked.", + ) + + # Stage 1 response (find) + find_response = { + "message": { + "role": "assistant", + "content": '{"relevant": ["2026-02-13_1501_quarterly-planning"]}', + }, + "done": True, + } + # Stage 2 response (answer) + answer_response = { + "message": { + "role": "assistant", + "content": 'Anna said:\n> The deadline is March 15th.\n\nThis was in the Quarterly Planning meeting.', + }, + "done": True, + } + # Auto-detect context size calls /api/show + show_response = { + "model_info": {"general.context_length": 8192}, + } + httpserver.expect_ordered_request("/api/show", method="POST").respond_with_json(show_response) + httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(find_response) + httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(answer_response) + + from ownscribe.config import Config + + config = Config() + config.output.dir = str(tmp_path) + config.summarization.host = httpserver.url_for("") + config.summarization.backend = "ollama" + config.summarization.model = "test-model" + + from ownscribe.search import ask + + output_lines: list[str] = [] + monkeypatch.setattr(click, "echo", lambda msg="": output_lines.append(str(msg))) + + ask(config, "What did Anna say about the deadline?", since=None, limit=None) + + output = "\n".join(output_lines) + assert "Found 1 relevant" in output + assert "Quarterly Planning" in output + assert "March 15th" in output + + +# -- OpenAI json_mode fallback tests -- + + +def _openai_ok_response(content: str) -> dict: + return { + "id": "chatcmpl-test", + "object": "chat.completion", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": content}, + "finish_reason": "stop", + } + ], + } + + +def _openai_400_response() -> tuple[dict, int]: + return ( + {"error": { + "message": "'response_format.type' must be 'json_schema' or 'text'", + "type": "invalid_request_error", + }}, + 400, + ) + + +class TestOpenAIChatJsonModeFallback: + def test_openai_chat_json_mode_fallback(self, httpserver): + """json_object and json_schema both fail → falls back to no response_format.""" + from ownscribe.config import SummarizationConfig + from ownscribe.summarization.openai_summarizer import OpenAISummarizer + + body_400, status_400 = _openai_400_response() + ep = "/v1/chat/completions" + # 1st attempt (json_object) → 400 + httpserver.expect_ordered_request(ep, method="POST").respond_with_json( + body_400, status=status_400, + ) + # 2nd attempt (json_schema) → 400 + httpserver.expect_ordered_request(ep, method="POST").respond_with_json( + body_400, status=status_400, + ) + # 3rd attempt (no response_format) → 200 + httpserver.expect_ordered_request(ep, method="POST").respond_with_json( + _openai_ok_response('{"relevant": ["meeting-1"]}'), + ) + + cfg = SummarizationConfig() + cfg.host = httpserver.url_for("") + cfg.model = "test-model" + summarizer = OpenAISummarizer(cfg) + + result = summarizer.chat("system", "user", json_mode=True) + assert '"relevant"' in result or "meeting-1" in result + + def test_openai_chat_json_schema_fallback(self, httpserver): + """json_object fails → falls back to json_schema which succeeds.""" + from ownscribe.config import SummarizationConfig + from ownscribe.summarization.openai_summarizer import OpenAISummarizer + + body_400, status_400 = _openai_400_response() + ep = "/v1/chat/completions" + # 1st attempt (json_object) → 400 + httpserver.expect_ordered_request(ep, method="POST").respond_with_json( + body_400, status=status_400, + ) + # 2nd attempt (json_schema) → 200 + httpserver.expect_ordered_request(ep, method="POST").respond_with_json( + _openai_ok_response('{"relevant": ["meeting-2"]}'), + ) + + cfg = SummarizationConfig() + cfg.host = httpserver.url_for("") + cfg.model = "test-model" + summarizer = OpenAISummarizer(cfg) + + result = summarizer.chat("system", "user", json_mode=True) + assert "meeting-2" in result From 77adb795fd0dc0b2489230461f677bf3abbc0127 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Wed, 18 Feb 2026 14:54:59 +0100 Subject: [PATCH 2/5] Improve prompts for `ask` feature to give more consistent answers --- src/ownscribe/summarization/prompts.py | 37 +++++++++++++++++++------- tests/test_search.py | 6 ++++- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/ownscribe/summarization/prompts.py b/src/ownscribe/summarization/prompts.py index 8ad8dd8..e6ff138 100644 --- a/src/ownscribe/summarization/prompts.py +++ b/src/ownscribe/summarization/prompts.py @@ -134,16 +134,35 @@ def list_templates() -> list[str]: Return ONLY valid JSON: {{"relevant": ["meeting-id-1", "meeting-id-2"]}}""" SEARCH_ANSWER_SYSTEM = ( - "You are a meeting assistant. Answer the user's question based on the meeting " - "transcripts provided. Each transcript is prefixed with a header like " - "'## [folder-name] Date Time — Title'. " - "When quoting or referencing information, always cite the meeting it came from " - "by including the meeting display name (e.g., '2026-02-13 15:01 — Quarterly Planning'). " - "Use verbatim text from the transcript for quotes. " - "Include the speaker label and timestamp for each quote. " - "If the answer is not found in the transcripts, say so." + "You are a meeting assistant. Answer the user's question based on the " + "meeting transcripts provided. Each transcript is prefixed with a header " + "like '## [folder-name] Date Time — Title'.\n" + "\n" + "Always use this exact format:\n" + "\n" + "1. Start with a 1-2 sentence summary answering the question.\n" + "2. Then list supporting quotes grouped by meeting:\n" + "\n" + "**Meeting display name**\n" + "- **SPEAKER** [timestamp]: \"Verbatim quote from the transcript.\"\n" + "\n" + "Example:\n" + "\n" + "The deadline was set for March 15th.\n" + "\n" + "**2026-02-13 15:01 — Quarterly Planning**\n" + "- **SPEAKER_03** [00:13]: \"So the deadline for Q1 deliverables is March 15th.\"\n" + "- **SPEAKER_01** [00:21]: \"They will hear from us by Friday.\"\n" + "\n" + "Rules:\n" + "- Use the meeting display name from the transcript header (after the folder name in brackets).\n" + "- Copy quotes verbatim from the transcript. Do not paraphrase.\n" + "- If the answer is not found in the transcripts, say so.\n" + "- Keep the answer concise." ) SEARCH_ANSWER_PROMPT = """Question: {question} -{transcripts}""" +{transcripts} + +Answer using the format from your instructions (summary, then quoted evidence grouped by meeting).""" diff --git a/tests/test_search.py b/tests/test_search.py index ece50d4..9f4b1a1 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -491,7 +491,11 @@ def test_end_to_end(self, httpserver, tmp_path, monkeypatch): answer_response = { "message": { "role": "assistant", - "content": 'Anna said:\n> The deadline is March 15th.\n\nThis was in the Quarterly Planning meeting.', + "content": ( + 'The deadline is March 15th.\n\n' + '**2026-02-13 15:01 — Quarterly Planning**\n' + '- **Anna** [00:01:00]: "The deadline is March 15th."' + ), }, "done": True, } From e4dcb7752e1c5d98c6948c0a2ed0cd01addfc4a5 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Wed, 18 Feb 2026 14:55:15 +0100 Subject: [PATCH 3/5] Help AI agents with AGENTS.md file --- AGENTS.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..870258e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,50 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +uv run pytest # run all tests +uv run pytest -v # verbose +uv run pytest -v -k test_search # run a specific test module +uv run pytest -v -k "TestRankMeetings::test_speaker_boost" # single test +uv run ruff check src/ tests/ # lint +uv run ruff format src/ tests/ # auto-format +``` + +## Architecture + +**ownscribe** is a CLI tool for local meeting recording, transcription, and summarization. The main pipeline is: Record → Transcribe → Summarize → Output. + +### Plugin systems with abstract base classes + +Each stage has a base class in its subpackage and one or more implementations: + +- **Audio** (`audio/base.py`): `CoreAudioRecorder` (macOS, wraps a Swift binary in `swift/`) and `SoundDeviceRecorder` (cross-platform fallback). Selected in `pipeline.py:_create_recorder()`. +- **Transcription** (`transcription/base.py`): `WhisperXTranscriber` (single impl). Data models (`Segment`, `Word`, `TranscriptResult`) live in `transcription/models.py`. +- **Summarization** (`summarization/base.py`): `OllamaSummarizer` and `OpenAISummarizer`. Factory is `summarization/__init__.py:create_summarizer()` — used by both `pipeline.py` and `search.py`. +- **Output** (`output/`): `markdown.py` and `json_output.py`, selected by `config.output.format`. + +### Key modules + +- **`cli.py`** — Click command group. Entry point: `ownscribe.cli:cli`. All subcommands (`ask`, `transcribe`, `summarize`, `devices`, `apps`, `config`, `cleanup`). +- **`pipeline.py`** — Orchestrates the record → transcribe → summarize flow. Creates timestamped output dirs (`~/ownscribe/YYYY-MM-DD_HHMM_slug/`). +- **`search.py`** — Two-stage LLM search over meeting notes. Stage 1 scores summaries for relevance, stage 2 synthesizes answers from full transcripts. Has keyword fallback and quote verification. Helper functions return data; only `ask()` calls `click.echo`. +- **`config.py`** — Dataclass hierarchy (`Config` → `AudioConfig`, `TranscriptionConfig`, `SummarizationConfig`, etc.). Loaded from `~/.config/ownscribe/config.toml` with env var overrides (`HF_TOKEN`, `OLLAMA_HOST`). +- **`summarization/prompts.py`** — Built-in prompt templates (meeting, lecture, brief) plus search prompts. Users can define custom templates in config TOML. + +### Testing conventions + +- Uses `pytest` with `pytest-httpserver` for mocking HTTP APIs (Ollama, OpenAI). +- Shared fixtures in `conftest.py`: `sample_transcript`, `diarized_transcript`, `synthetic_wav`. +- Tests use `FakeSummarizer` (in `test_search.py`) or `unittest.mock` for pipeline tests. +- Markers: `@pytest.mark.hardware` (auto-skipped in CI), `@pytest.mark.macos` (auto-skipped on non-macOS). +- When mocking the shared summarizer factory in pipeline tests, patch `ownscribe.pipeline.create_summarizer` (it's imported at module level). + +## Style + +- Python 3.12+. Ruff with line-length 120. +- `from __future__ import annotations` in all modules. +- Lazy imports for heavy dependencies (whisperx, ollama, openai) — imported inside functions, not at module level. +- Helper functions return data; orchestrator functions (`ask()`, `run_pipeline()`) handle all `click.echo` output. From c8d368351839a0935dae91362d2d592b616e0b88 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Wed, 18 Feb 2026 15:08:17 +0100 Subject: [PATCH 4/5] Address GitHub's Copilot comments --- src/ownscribe/search.py | 28 ++++++++++++++++++++++------ tests/test_search.py | 26 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/ownscribe/search.py b/src/ownscribe/search.py index 33e29fd..2393d43 100644 --- a/src/ownscribe/search.py +++ b/src/ownscribe/search.py @@ -43,7 +43,15 @@ def ask(config: Config, question: str, since: str | None, limit: int | None) -> if skipped: click.echo(f"({skipped} meetings without summaries were skipped)") + if not config.summarization.enabled: + click.echo("Summarization must be enabled to use ask. Check your configuration.") + return + summarizer = create_summarizer(config) + if not summarizer.is_available(): + click.echo("Summarization backend is not reachable. Check your configuration.") + return + context_size = _resolve_context_size(config) # Stage 1 @@ -353,7 +361,7 @@ def _find_relevant_meetings( spinner.update("Falling back to keyword search") return _rank_meetings(question, keyword_matches) - # Build result list preserving order, then rank + # Build result list from unique IDs, then rank meeting_by_id = {m.folder_name: m for m in meetings} relevant = [meeting_by_id[id_] for id_ in all_relevant_ids if id_ in meeting_by_id] return _rank_meetings(question, relevant) @@ -410,7 +418,6 @@ def _answer_from_transcripts( transcript_parts: list[str] = [] used_tokens = 0 - included = 0 skipped = 0 for m in meetings: @@ -428,7 +435,6 @@ def _answer_from_transcripts( transcript_parts.append(entry) used_tokens += entry_tokens - included += 1 if not transcript_parts: return "No transcript text available for the relevant meetings.", skipped @@ -469,7 +475,7 @@ def _key_phrases(quote: str, min_words: int = 8, max_words: int = 12) -> list[st """Extract key phrases (8-12 word spans) from a quote for verification.""" words = quote.split() if len(words) <= max_words: - return [" ".join(words)] if len(words) >= 4 else [] + return [" ".join(words)] if len(words) >= min_words else [] phrases = [] # Take a few spans spread across the quote @@ -505,13 +511,23 @@ def _verify_quotes(answer: str, transcripts: dict[str, str]) -> str: # Annotate unverified quotes in the answer result_lines: list[str] = [] for line in answer.split("\n"): - result_lines.append(line) stripped = line.strip() - # Check if this line ends a blockquote that contains an unverified quote + annotated = False + # Blockquote annotation if stripped.startswith(">"): quote_text = stripped.lstrip("> ").strip() for uq in unverified_quotes: if quote_text and quote_text in uq: + result_lines.append(line) result_lines.append("[unverified]") + annotated = True break + # Inline quote annotation: mark "..." segments that are unverified + if not annotated: + modified = line + for uq in unverified_quotes: + target = f'"{uq}"' + if target in modified: + modified = modified.replace(target, f'{target} [unverified]') + result_lines.append(modified) return "\n".join(result_lines) diff --git a/tests/test_search.py b/tests/test_search.py index 9f4b1a1..fa033b6 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -431,6 +431,31 @@ def test_not_found(self): result = _verify_quotes(answer, transcripts) assert "[unverified]" in result + def test_inline_quote_not_found(self): + answer = ( + 'Anna said "The completely fabricated quote that does not' + ' exist in any transcript at all" in the meeting.' + ) + transcripts = { + "meeting-1": "Alice: Let's discuss the budget.\nBob: Sure, sounds good.", + } + result = _verify_quotes(answer, transcripts) + assert "[unverified]" in result + + def test_inline_quote_verified(self): + answer = ( + 'Anna said "The deadline for Q1 deliverables is March 15th' + ' but I think we should aim for March 10th" in the meeting.' + ) + transcripts = { + "meeting-1": ( + "The deadline for Q1 deliverables is March 15th but I think" + " we should aim for March 10th to have buffer time." + ), + } + result = _verify_quotes(answer, transcripts) + assert "[unverified]" not in result + def test_empty_transcripts(self): answer = "> Some quote here" result = _verify_quotes(answer, {}) @@ -503,6 +528,7 @@ def test_end_to_end(self, httpserver, tmp_path, monkeypatch): show_response = { "model_info": {"general.context_length": 8192}, } + httpserver.expect_ordered_request("/api/tags", method="GET").respond_with_json({"models": []}) httpserver.expect_ordered_request("/api/show", method="POST").respond_with_json(show_response) httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(find_response) httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(answer_response) From b809eccc28fe66c7413f7e7bb7f176f9e4bd0e93 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Wed, 18 Feb 2026 15:20:37 +0100 Subject: [PATCH 5/5] Keep `summarizer.chat` generic, move JSON schema into caller --- src/ownscribe/search.py | 13 ++++++++- src/ownscribe/summarization/base.py | 5 +++- .../summarization/ollama_summarizer.py | 5 +++- .../summarization/openai_summarizer.py | 27 +++++++++---------- tests/test_search.py | 11 +++++--- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/ownscribe/search.py b/src/ownscribe/search.py index 2393d43..4135f4b 100644 --- a/src/ownscribe/search.py +++ b/src/ownscribe/search.py @@ -23,6 +23,17 @@ _DEFAULT_CONTEXT_SIZE = 8192 +_SEARCH_RESULTS_SCHEMA = { + "name": "search_results", + "strict": True, + "schema": { + "type": "object", + "properties": {"relevant": {"type": "array", "items": {"type": "string"}}}, + "required": ["relevant"], + "additionalProperties": False, + }, +} + class Meeting(NamedTuple): folder_name: str @@ -344,7 +355,7 @@ def _find_relevant_meetings( summaries = "\n\n".join(summaries_parts) prompt = SEARCH_FIND_PROMPT.format(question=question, summaries=summaries) - response = summarizer.chat(SEARCH_FIND_SYSTEM, prompt, json_mode=True) + response = summarizer.chat(SEARCH_FIND_SYSTEM, prompt, json_mode=True, json_schema=_SEARCH_RESULTS_SCHEMA) ids = _parse_relevant_ids(response) if ids is None: diff --git a/src/ownscribe/summarization/base.py b/src/ownscribe/summarization/base.py index 0df2930..0858645 100644 --- a/src/ownscribe/summarization/base.py +++ b/src/ownscribe/summarization/base.py @@ -17,7 +17,10 @@ def generate_title(self, summary_text: str) -> str: """Generate a short meeting title from a summary.""" @abc.abstractmethod - def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + def chat( + self, system_prompt: str, user_prompt: str, + json_mode: bool = False, json_schema: dict | None = None, + ) -> str: """Send a chat completion request and return the response text.""" @abc.abstractmethod diff --git a/src/ownscribe/summarization/ollama_summarizer.py b/src/ownscribe/summarization/ollama_summarizer.py index 6a90f90..0b7e860 100644 --- a/src/ownscribe/summarization/ollama_summarizer.py +++ b/src/ownscribe/summarization/ollama_summarizer.py @@ -17,7 +17,10 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) - self._templates = templates or {} self._client = ollama.Client(host=config.host) - def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + def chat( + self, system_prompt: str, user_prompt: str, + json_mode: bool = False, json_schema: dict | None = None, + ) -> str: kwargs = {} if json_mode: kwargs["format"] = "json" diff --git a/src/ownscribe/summarization/openai_summarizer.py b/src/ownscribe/summarization/openai_summarizer.py index 2d1128f..9bf771b 100644 --- a/src/ownscribe/summarization/openai_summarizer.py +++ b/src/ownscribe/summarization/openai_summarizer.py @@ -21,27 +21,24 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) - base_url = base_url.rstrip("/") + "/v1" self._client = openai.OpenAI(base_url=base_url, api_key="not-needed") - def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + def chat( + self, system_prompt: str, user_prompt: str, + json_mode: bool = False, json_schema: dict | None = None, + ) -> str: messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] formats_to_try: list[dict | None] = [None] if json_mode: - formats_to_try = [ - {"type": "json_object"}, - {"type": "json_schema", "json_schema": { - "name": "search_results", - "strict": True, - "schema": { - "type": "object", - "properties": {"relevant": {"type": "array", "items": {"type": "string"}}}, - "required": ["relevant"], - "additionalProperties": False, - }, - }}, - None, - ] + if json_schema is not None: + formats_to_try = [ + {"type": "json_object"}, + {"type": "json_schema", "json_schema": json_schema}, + None, + ] + else: + formats_to_try = [{"type": "json_object"}, None] for fmt in formats_to_try: try: kwargs = {} diff --git a/tests/test_search.py b/tests/test_search.py index fa033b6..93426d9 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -40,7 +40,10 @@ def __init__(self, responses: list[str] | None = None): self._responses = list(responses or []) self._call_idx = 0 - def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str: + def chat( + self, system_prompt: str, user_prompt: str, + json_mode: bool = False, json_schema: dict | None = None, + ) -> str: self.calls.append((system_prompt, user_prompt, json_mode)) if self._responses: resp = self._responses[self._call_idx % len(self._responses)] @@ -607,7 +610,8 @@ def test_openai_chat_json_mode_fallback(self, httpserver): cfg.model = "test-model" summarizer = OpenAISummarizer(cfg) - result = summarizer.chat("system", "user", json_mode=True) + schema = {"name": "test", "strict": True, "schema": {"type": "object"}} + result = summarizer.chat("system", "user", json_mode=True, json_schema=schema) assert '"relevant"' in result or "meeting-1" in result def test_openai_chat_json_schema_fallback(self, httpserver): @@ -631,5 +635,6 @@ def test_openai_chat_json_schema_fallback(self, httpserver): cfg.model = "test-model" summarizer = OpenAISummarizer(cfg) - result = summarizer.chat("system", "user", json_mode=True) + schema = {"name": "test", "strict": True, "schema": {"type": "object"}} + result = summarizer.chat("system", "user", json_mode=True, json_schema=schema) assert "meeting-2" in result