From ee2f6dcb10272ea1c55a8a1fc5108e47c0e36212 Mon Sep 17 00:00:00 2001
From: Pascal Berrang <git@p4l.dev>
Date: Wed, 18 Feb 2026 14:28:26 +0100
Subject: [PATCH 1/5] Add `ask` command for natural-language search across
 meeting notes

Two-stage LLM pipeline: first identifies relevant meetings from summaries,
then answers from full transcripts with quote verification. Includes spinner
UX, meeting source citation in answers, keyword fallback, and context_size
config option.
---
 README.md                                     |  19 +
 src/ownscribe/cli.py                          |  13 +
 src/ownscribe/config.py                       |   2 +
 src/ownscribe/pipeline.py                     |  14 +-
 src/ownscribe/search.py                       | 517 +++++++++++++++
 src/ownscribe/summarization/__init__.py       |  19 +
 src/ownscribe/summarization/base.py           |   4 +
 .../summarization/ollama_summarizer.py        |  14 +
 .../summarization/openai_summarizer.py        |  36 ++
 src/ownscribe/summarization/prompts.py        |  35 +
 tests/test_pipeline.py                        |   4 +-
 tests/test_search.py                          | 605 ++++++++++++++++++
 12 files changed, 1269 insertions(+), 13 deletions(-)
 create mode 100644 src/ownscribe/search.py
 create mode 100644 tests/test_search.py

diff --git a/README.md b/README.md
index fcc5c3b..48f3269 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ All audio, transcripts, and summaries remain local.
 - **Pipeline progress** — live checklist showing transcription, diarization sub-steps, and summarization progress
 - **Local LLM summarization** — structured meeting notes via Ollama, LM Studio, or any OpenAI-compatible server
 - **Summarization templates** — built-in presets for meetings, lectures, and quick briefs; define your own in config
+- **Ask your meetings** — ask natural-language questions across all your meeting notes; uses a two-stage LLM pipeline with keyword fallback
 - **One command** — just run `ownscribe`, press Ctrl+C when done, get transcript + summary
 
 ## Requirements
@@ -118,10 +119,27 @@ ownscribe devices                  # list audio devices (uses native CoreAudio w
 ownscribe apps                     # list running apps with PIDs for use with --pid
 ownscribe transcribe recording.wav # transcribe an existing audio file
 ownscribe summarize transcript.md  # summarize an existing transcript
+ownscribe ask "question"           # search your meetings with a natural-language question
 ownscribe config                   # open config file in $EDITOR
 ownscribe cleanup                  # remove ownscribe data from disk
 ```
 
+### Searching Meeting Notes
+
+Use `ask` to search across all your meeting notes with natural-language questions:
+
+```bash
+ownscribe ask "What did Anna say about the deadline?"
+ownscribe ask "budget decisions" --since 2026-01-01
+ownscribe ask "action items from last week" --limit 5
+```
+
+This runs a two-stage pipeline:
+1. **Find** — sends meeting summaries to the LLM to identify which meetings are relevant
+2. **Answer** — sends the full transcripts of relevant meetings to the LLM to produce an answer with quotes
+
+If the LLM finds no relevant meetings, a keyword fallback searches summaries and transcripts directly.
+
 ## Configuration
 
 Config is stored at `~/.config/ownscribe/config.toml`. Run `ownscribe config` to create and edit it.
@@ -149,6 +167,7 @@ backend = "ollama"        # "ollama" or "openai"
 model = "mistral"
 host = "http://localhost:11434"
 # template = "meeting"    # "meeting", "lecture", "brief", or a custom name
+# context_size = 0        # 0 = auto-detect from model; set manually for OpenAI-compatible backends
 
 # Custom templates (optional):
 # [templates.my-standup]
diff --git a/src/ownscribe/cli.py b/src/ownscribe/cli.py
index 899f3f8..8fc6a2c 100644
--- a/src/ownscribe/cli.py
+++ b/src/ownscribe/cli.py
@@ -97,6 +97,19 @@ def cli(
         run_pipeline(config)
 
 
+@cli.command()
+@click.argument("question")
+@click.option("--since", default=None, help="Only search meetings after this date (YYYY-MM-DD).")
+@click.option("--limit", default=None, type=int, help="Max number of recent meetings to search.")
+@click.pass_context
+def ask(ctx: click.Context, question: str, since: str | None, limit: int | None) -> None:
+    """Ask a question across your meeting notes."""
+    config = ctx.obj["config"]
+    from ownscribe.search import ask as run_ask
+
+    run_ask(config, question, since=since, limit=limit)
+
+
 @cli.command()
 def devices() -> None:
     """List available audio input devices."""
diff --git a/src/ownscribe/config.py b/src/ownscribe/config.py
index ba9df2a..4ca4c50 100644
--- a/src/ownscribe/config.py
+++ b/src/ownscribe/config.py
@@ -35,6 +35,7 @@
 model = "mistral"         # model name
 host = "http://localhost:11434"  # ollama: :11434, LM Studio: :1234
 # template = "meeting"    # built-in: "meeting", "lecture", or "brief"
+# context_size = 0        # 0 = auto-detect from model; set manually for OpenAI-compatible backends
 
 # Custom templates (optional):
 # [templates.my-notes]
@@ -79,6 +80,7 @@ class SummarizationConfig:
     model: str = "mistral"
     host: str = "http://localhost:11434"
     template: str = ""
+    context_size: int = 0
 
 
 @dataclass
diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py
index aaaefc6..5664d41 100644
--- a/src/ownscribe/pipeline.py
+++ b/src/ownscribe/pipeline.py
@@ -17,6 +17,7 @@
 
 from ownscribe.config import Config
 from ownscribe.progress import PipelineProgress, Spinner
+from ownscribe.summarization import create_summarizer
 
 # A standard WAV file header (RIFF + fmt + data chunk header) is 44 bytes.
 # Files at or below this size contain no audio frames.
@@ -87,15 +88,6 @@ def _create_transcriber(config: Config, progress=None):
     return WhisperXTranscriber(config.transcription, diar_config, progress=progress)
 
 
-def _create_summarizer(config: Config):
-    """Create the appropriate summarizer based on config."""
-    if config.summarization.backend == "openai":
-        from ownscribe.summarization.openai_summarizer import OpenAISummarizer
-        return OpenAISummarizer(config.summarization, config.templates)
-    else:
-        from ownscribe.summarization.ollama_summarizer import OllamaSummarizer
-        return OllamaSummarizer(config.summarization, config.templates)
-
 
 def _format_output(config: Config, transcript_result, summary_text: str | None = None) -> tuple[str, str | None]:
     """Format transcript and optional summary. Returns (transcript_str, summary_str)."""
@@ -233,7 +225,7 @@ def run_summarize(config: Config, transcript_file: str) -> None:
     """Summarize a transcript file."""
     transcript_text = Path(transcript_file).read_text()
 
-    summarizer = _create_summarizer(config)
+    summarizer = create_summarizer(config)
     if not summarizer.is_available():
         click.echo(
             f"Error: {config.summarization.backend} is not reachable at {config.summarization.host}. "
@@ -295,7 +287,7 @@ def _do_transcribe_and_summarize(
 
         # 3. Summarize
         if sum_enabled:
-            summarizer = _create_summarizer(config)
+            summarizer = create_summarizer(config)
             if not summarizer.is_available():
                 click.echo(
                     f"\nWarning: {config.summarization.backend} is not reachable "
diff --git a/src/ownscribe/search.py b/src/ownscribe/search.py
new file mode 100644
index 0000000..33e29fd
--- /dev/null
+++ b/src/ownscribe/search.py
@@ -0,0 +1,517 @@
+"""Semantic search over meeting notes using a two-stage LLM pipeline."""
+
+from __future__ import annotations
+
+import json
+import re
+from datetime import date
+from pathlib import Path
+from typing import NamedTuple
+
+import click
+
+from ownscribe.config import Config
+from ownscribe.progress import Spinner
+from ownscribe.summarization import create_summarizer
+from ownscribe.summarization.base import Summarizer
+from ownscribe.summarization.prompts import (
+    SEARCH_ANSWER_PROMPT,
+    SEARCH_ANSWER_SYSTEM,
+    SEARCH_FIND_PROMPT,
+    SEARCH_FIND_SYSTEM,
+)
+
+_DEFAULT_CONTEXT_SIZE = 8192
+
+
+class Meeting(NamedTuple):
+    folder_name: str
+    display_name: str
+    summary_path: Path
+    transcript_path: Path | None
+
+
+def ask(config: Config, question: str, since: str | None, limit: int | None) -> None:
+    """Run a two-stage search over meeting notes and print the answer."""
+    output_dir = config.output.resolved_dir
+    meetings, skipped = _discover_meetings(output_dir, since=since, limit=limit)
+
+    if not meetings:
+        click.echo("No meetings found.")
+        return
+
+    if skipped:
+        click.echo(f"({skipped} meetings without summaries were skipped)")
+
+    summarizer = create_summarizer(config)
+    context_size = _resolve_context_size(config)
+
+    # Stage 1
+    label = f"Searching {len(meetings)} meetings"
+    with Spinner(label) as spinner:
+        relevant = _find_relevant_meetings(
+            summarizer, question, meetings, context_size, spinner=spinner,
+        )
+        spinner.update(label)  # restore label so exit message is clean
+
+    if not relevant:
+        click.echo("No relevant meetings found for your question.")
+        return
+
+    click.echo(f"Found {len(relevant)} relevant meetings:")
+    for m in relevant:
+        click.echo(f"  - {m.display_name}")
+
+    # Stage 2
+    with Spinner("Analyzing transcripts"):
+        answer, skipped_transcripts = _answer_from_transcripts(summarizer, question, relevant, context_size)
+        answer = _verify_quotes(answer, _load_transcripts(relevant))
+
+    if skipped_transcripts:
+        click.echo(f"({skipped_transcripts} transcripts did not fit within context budget, they were skipped)")
+
+    click.echo(answer)
+
+
+
+def _resolve_context_size(config: Config) -> int:
+    if config.summarization.context_size > 0:
+        return config.summarization.context_size
+
+    if config.summarization.backend == "ollama":
+        try:
+            import ollama
+
+            client = ollama.Client(host=config.summarization.host)
+            info = client.show(config.summarization.model)
+            # Ollama returns model info with context window details
+            model_info = info.get("model_info", {})
+            for key, value in model_info.items():
+                if "context_length" in key:
+                    return int(value)
+        except Exception:
+            pass
+
+    return _DEFAULT_CONTEXT_SIZE
+
+
+# -- Discovery --
+
+
+_FOLDER_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})_(\d{2})(\d{2})(?:_(.+))?$")
+
+
+def _parse_folder_name(name: str) -> tuple[str, str] | None:
+    """Parse a folder name like '2026-02-13_1501_quarterly-planning'.
+
+    Also accepts folders without a slug, e.g. '2026-02-16_1433'.
+    Returns (date_str, display_name) or None if the name doesn't match.
+    """
+    m = _FOLDER_RE.match(name)
+    if not m:
+        return None
+    date_str = m.group(1)
+    hour, minute = m.group(2), m.group(3)
+    slug = m.group(4)
+    if slug:
+        title = slug.replace("-", " ").title()
+        return date_str, f"{date_str} {hour}:{minute} — {title}"
+    return date_str, f"{date_str} {hour}:{minute}"
+
+
+def _discover_meetings(
+    output_dir: Path, since: str | None, limit: int | None,
+) -> tuple[list[Meeting], int]:
+    if not output_dir.exists():
+        return [], 0
+
+    meetings: list[Meeting] = []
+    skipped = 0
+
+    for folder in sorted(output_dir.iterdir(), reverse=True):
+        if not folder.is_dir():
+            continue
+        parsed = _parse_folder_name(folder.name)
+        if parsed is None:
+            continue
+
+        date_str, display_name = parsed
+
+        # Find summary file
+        summary_path = None
+        for ext in ("summary.md", "summary.json"):
+            candidate = folder / ext
+            if candidate.exists():
+                summary_path = candidate
+                break
+
+        if summary_path is None:
+            skipped += 1
+            continue
+
+        # Find transcript file
+        transcript_path = None
+        for ext in ("transcript.md", "transcript.json"):
+            candidate = folder / ext
+            if candidate.exists():
+                transcript_path = candidate
+                break
+
+        # Apply --since filter
+        if since:
+            try:
+                since_date = date.fromisoformat(since)
+                meeting_date = date.fromisoformat(date_str)
+                if meeting_date < since_date:
+                    continue
+            except ValueError:
+                pass
+
+        meetings.append(Meeting(folder.name, display_name, summary_path, transcript_path))
+
+    # Apply --limit cap (meetings are already newest-first)
+    if limit is not None and limit > 0:
+        meetings = meetings[:limit]
+
+    return meetings, skipped
+
+
+# -- Token estimation --
+
+
+def _estimate_tokens(text: str) -> int:
+    return len(text) // 4
+
+
+# -- Chunking --
+
+
+def _build_summary_chunks(
+    meetings: list[Meeting], context_budget: int,
+) -> list[list[Meeting]]:
+    effective = int(context_budget * 0.8)
+    overhead = 1000  # system prompt + question + response headroom
+    budget = max(effective - overhead, 500)
+
+    chunks: list[list[Meeting]] = []
+    current_chunk: list[Meeting] = []
+    current_size = 0
+
+    for m in meetings:
+        summary_text = m.summary_path.read_text()
+        header = f"## [{m.folder_name}]\n"
+        entry_tokens = _estimate_tokens(header + summary_text)
+
+        if current_chunk and current_size + entry_tokens > budget:
+            chunks.append(current_chunk)
+            current_chunk = []
+            current_size = 0
+
+        current_chunk.append(m)
+        current_size += entry_tokens
+
+    if current_chunk:
+        chunks.append(current_chunk)
+
+    return chunks
+
+
+# -- Stage 1: Find relevant meetings --
+
+
+_JSON_RE = re.compile(r'\{[^{}]*"relevant"[^{}]*\}', re.DOTALL)
+_ARRAY_RE = re.compile(r'\[.*?\]', re.DOTALL)
+
+
+def _parse_relevant_ids(response: str) -> list[str] | None:
+    """Extract meeting IDs from a Stage 1 LLM response.
+
+    Returns a list of IDs, or None if parsing fails entirely.
+    """
+    # Try direct JSON parse
+    try:
+        data = json.loads(response)
+        if isinstance(data, dict) and "relevant" in data:
+            ids = data["relevant"]
+            if isinstance(ids, list):
+                return [str(i) for i in ids]
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    # Fallback: regex for JSON object with "relevant"
+    m = _JSON_RE.search(response)
+    if m:
+        try:
+            data = json.loads(m.group())
+            if isinstance(data, dict) and "relevant" in data:
+                ids = data["relevant"]
+                if isinstance(ids, list):
+                    return [str(i) for i in ids]
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    # Fallback: bare array
+    m = _ARRAY_RE.search(response)
+    if m:
+        try:
+            ids = json.loads(m.group())
+            if isinstance(ids, list):
+                return [str(i) for i in ids]
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    return None
+
+
+_STOP_WORDS = frozenset([
+    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+    "have", "has", "had", "do", "does", "did", "will", "would", "shall",
+    "should", "may", "might", "can", "could", "of", "in", "to", "for", "on",
+    "with", "at", "by", "from", "about", "into", "through", "during", "before",
+    "after", "above", "below", "between", "out", "off", "over", "under",
+    "again", "further", "then", "once", "here", "there", "when", "where",
+    "why", "how", "all", "each", "every", "both", "few", "more", "most",
+    "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so",
+    "than", "too", "very", "and", "but", "or", "if", "what", "which", "who",
+    "whom", "this", "that", "these", "those", "i", "me", "my", "myself", "we",
+    "our", "ours", "ourselves", "you", "your", "yours", "yourself",
+    "yourselves", "he", "him", "his", "himself", "she", "her", "hers",
+    "herself", "it", "its", "itself", "they", "them", "their", "theirs",
+    "themselves", "am", "s", "t", "d", "ll", "ve", "re",
+])
+
+
+def _extract_keywords(text: str) -> set[str]:
+    """Extract lowercase keywords from *text*, filtering stop words."""
+    words = set(re.findall(r"[a-z]+", text.lower()))
+    filtered = words - _STOP_WORDS
+    return filtered if filtered else words
+
+
+def _keyword_fallback(
+    question: str, meetings: list[Meeting],
+) -> list[Meeting]:
+    """Return meetings whose summary or transcript shares keywords with *question*."""
+    kw = _extract_keywords(question)
+    if not kw:
+        return []
+
+    matches: list[Meeting] = []
+    for m in meetings:
+        summary_text = m.summary_path.read_text()
+        summary_words = set(re.findall(r"[a-z]+", summary_text.lower()))
+        if kw & summary_words:
+            matches.append(m)
+            continue
+        if m.transcript_path and m.transcript_path.exists():
+            transcript_text = m.transcript_path.read_text()
+            transcript_words = set(re.findall(r"[a-z]+", transcript_text.lower()))
+            if kw & transcript_words:
+                matches.append(m)
+    return matches
+
+
+def _find_relevant_meetings(
+    summarizer: Summarizer,
+    question: str,
+    meetings: list[Meeting],
+    context_size: int,
+    spinner: Spinner | None = None,
+) -> list[Meeting]:
+    chunks = _build_summary_chunks(meetings, context_size)
+    all_relevant_ids: set[str] = set()
+    total_chunks = len(chunks)
+
+    for i, chunk in enumerate(chunks):
+        if total_chunks > 1 and spinner is not None:
+            spinner.update(f"Searching meetings (batch {i + 1}/{total_chunks})")
+
+        known_ids = {m.folder_name for m in chunk}
+
+        # Build summaries text
+        summaries_parts = []
+        for m in chunk:
+            summary_text = m.summary_path.read_text()
+            summaries_parts.append(f"## [{m.folder_name}]\n{summary_text}")
+        summaries = "\n\n".join(summaries_parts)
+
+        prompt = SEARCH_FIND_PROMPT.format(question=question, summaries=summaries)
+        response = summarizer.chat(SEARCH_FIND_SYSTEM, prompt, json_mode=True)
+
+        ids = _parse_relevant_ids(response)
+        if ids is None:
+            # Total fallback: include all from this chunk
+            all_relevant_ids.update(known_ids)
+        else:
+            # Validate: only keep IDs that exist in this chunk
+            all_relevant_ids.update(id_ for id_ in ids if id_ in known_ids)
+
+    if not all_relevant_ids:
+        keyword_matches = _keyword_fallback(question, meetings)
+        if keyword_matches:
+            if spinner is not None:
+                spinner.update("Falling back to keyword search")
+            return _rank_meetings(question, keyword_matches)
+
+    # Build result list preserving order, then rank
+    meeting_by_id = {m.folder_name: m for m in meetings}
+    relevant = [meeting_by_id[id_] for id_ in all_relevant_ids if id_ in meeting_by_id]
+    return _rank_meetings(question, relevant)
+
+
+# -- Ranking --
+
+
+def _rank_meetings(question: str, meetings: list[Meeting]) -> list[Meeting]:
+    """Rank meetings by keyword overlap, speaker mention, and recency."""
+    question_words = set(question.lower().split())
+
+    def score(m: Meeting) -> tuple[float, float, str]:
+        summary_text = m.summary_path.read_text().lower()
+        summary_words = set(summary_text.split())
+
+        # Keyword overlap
+        overlap = len(question_words & summary_words)
+
+        # Speaker boost: if a capitalized word in the question appears in the summary
+        speaker_boost = 0.0
+        for word in question.split():
+            if word[0:1].isupper() and len(word) > 1 and word.lower() in summary_text:
+                speaker_boost = 2.0
+                break
+
+        # Recency: folder name sorts lexicographically by date
+        return (overlap + speaker_boost, speaker_boost, m.folder_name)
+
+    return sorted(meetings, key=score, reverse=True)
+
+
+# -- Stage 2: Answer from transcripts --
+
+
+def _load_transcripts(meetings: list[Meeting]) -> dict[str, str]:
+    """Load transcript text keyed by folder name."""
+    transcripts: dict[str, str] = {}
+    for m in meetings:
+        if m.transcript_path and m.transcript_path.exists():
+            transcripts[m.folder_name] = m.transcript_path.read_text()
+    return transcripts
+
+
+def _answer_from_transcripts(
+    summarizer: Summarizer,
+    question: str,
+    meetings: list[Meeting],
+    context_size: int,
+) -> tuple[str, int]:
+    effective = int(context_size * 0.8)
+    overhead = 1500  # system prompt + question + response headroom
+    budget = max(effective - overhead, 500)
+
+    transcript_parts: list[str] = []
+    used_tokens = 0
+    included = 0
+    skipped = 0
+
+    for m in meetings:
+        if m.transcript_path is None or not m.transcript_path.exists():
+            skipped += 1
+            continue
+
+        text = m.transcript_path.read_text()
+        entry = f"## [{m.folder_name}] {m.display_name}\n{text}"
+        entry_tokens = _estimate_tokens(entry)
+
+        if used_tokens + entry_tokens > budget:
+            skipped += 1
+            continue
+
+        transcript_parts.append(entry)
+        used_tokens += entry_tokens
+        included += 1
+
+    if not transcript_parts:
+        return "No transcript text available for the relevant meetings.", skipped
+
+    transcripts_text = "\n\n".join(transcript_parts)
+    prompt = SEARCH_ANSWER_PROMPT.format(question=question, transcripts=transcripts_text)
+    return summarizer.chat(SEARCH_ANSWER_SYSTEM, prompt), skipped
+
+
+# -- Quote verification --
+
+
+def _extract_quotes(text: str) -> list[str]:
+    """Extract quoted text from the answer (> blockquotes and "..." quotes)."""
+    quotes: list[str] = []
+
+    # Blockquotes: lines starting with >
+    blockquote_lines: list[str] = []
+    for line in text.split("\n"):
+        stripped = line.strip()
+        if stripped.startswith(">"):
+            blockquote_lines.append(stripped.lstrip("> ").strip())
+        else:
+            if blockquote_lines:
+                quotes.append(" ".join(blockquote_lines))
+                blockquote_lines = []
+    if blockquote_lines:
+        quotes.append(" ".join(blockquote_lines))
+
+    # Inline quotes: text in "..."
+    for m in re.finditer(r'"([^"]{20,})"', text):
+        quotes.append(m.group(1))
+
+    return quotes
+
+
+def _key_phrases(quote: str, min_words: int = 8, max_words: int = 12) -> list[str]:
+    """Extract key phrases (8-12 word spans) from a quote for verification."""
+    words = quote.split()
+    if len(words) <= max_words:
+        return [" ".join(words)] if len(words) >= 4 else []
+
+    phrases = []
+    # Take a few spans spread across the quote
+    step = max(1, (len(words) - min_words) // 3)
+    for start in range(0, len(words) - min_words + 1, step):
+        end = min(start + max_words, len(words))
+        phrases.append(" ".join(words[start:end]))
+        if len(phrases) >= 3:
+            break
+    return phrases
+
+
+def _verify_quotes(answer: str, transcripts: dict[str, str]) -> str:
+    """Check quotes against transcripts and mark unverified ones."""
+    if not transcripts:
+        return answer
+
+    all_text = "\n".join(transcripts.values()).lower()
+    quotes = _extract_quotes(answer)
+
+    unverified_quotes: set[str] = set()
+    for quote in quotes:
+        phrases = _key_phrases(quote)
+        if not phrases:
+            continue
+        verified = any(phrase.lower() in all_text for phrase in phrases)
+        if not verified:
+            unverified_quotes.add(quote)
+
+    if not unverified_quotes:
+        return answer
+
+    # Annotate unverified quotes in the answer
+    result_lines: list[str] = []
+    for line in answer.split("\n"):
+        result_lines.append(line)
+        stripped = line.strip()
+        # Check if this line ends a blockquote that contains an unverified quote
+        if stripped.startswith(">"):
+            quote_text = stripped.lstrip("> ").strip()
+            for uq in unverified_quotes:
+                if quote_text and quote_text in uq:
+                    result_lines.append("[unverified]")
+                    break
+    return "\n".join(result_lines)
diff --git a/src/ownscribe/summarization/__init__.py b/src/ownscribe/summarization/__init__.py
index e69de29..558b8e2 100644
--- a/src/ownscribe/summarization/__init__.py
+++ b/src/ownscribe/summarization/__init__.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ownscribe.config import Config
+    from ownscribe.summarization.base import Summarizer
+
+
+def create_summarizer(config: Config) -> Summarizer:
+    """Create the appropriate summarizer based on config."""
+    if config.summarization.backend == "openai":
+        from ownscribe.summarization.openai_summarizer import OpenAISummarizer
+
+        return OpenAISummarizer(config.summarization, config.templates)
+    else:
+        from ownscribe.summarization.ollama_summarizer import OllamaSummarizer
+
+        return OllamaSummarizer(config.summarization, config.templates)
diff --git a/src/ownscribe/summarization/base.py b/src/ownscribe/summarization/base.py
index db38470..0df2930 100644
--- a/src/ownscribe/summarization/base.py
+++ b/src/ownscribe/summarization/base.py
@@ -16,6 +16,10 @@ def summarize(self, transcript_text: str) -> str:
     def generate_title(self, summary_text: str) -> str:
         """Generate a short meeting title from a summary."""
 
+    @abc.abstractmethod
+    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+        """Send a chat completion request and return the response text."""
+
     @abc.abstractmethod
     def is_available(self) -> bool:
         """Check if the summarization backend is reachable."""
diff --git a/src/ownscribe/summarization/ollama_summarizer.py b/src/ownscribe/summarization/ollama_summarizer.py
index 5a108e5..6a90f90 100644
--- a/src/ownscribe/summarization/ollama_summarizer.py
+++ b/src/ownscribe/summarization/ollama_summarizer.py
@@ -17,6 +17,20 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) -
         self._templates = templates or {}
         self._client = ollama.Client(host=config.host)
 
+    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+        kwargs = {}
+        if json_mode:
+            kwargs["format"] = "json"
+        response = self._client.chat(
+            model=self._config.model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            **kwargs,
+        )
+        return clean_response(response["message"]["content"])
+
     def is_available(self) -> bool:
         try:
             self._client.list()
diff --git a/src/ownscribe/summarization/openai_summarizer.py b/src/ownscribe/summarization/openai_summarizer.py
index b000849..2d1128f 100644
--- a/src/ownscribe/summarization/openai_summarizer.py
+++ b/src/ownscribe/summarization/openai_summarizer.py
@@ -21,6 +21,42 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) -
             base_url = base_url.rstrip("/") + "/v1"
         self._client = openai.OpenAI(base_url=base_url, api_key="not-needed")
 
+    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+        formats_to_try: list[dict | None] = [None]
+        if json_mode:
+            formats_to_try = [
+                {"type": "json_object"},
+                {"type": "json_schema", "json_schema": {
+                    "name": "search_results",
+                    "strict": True,
+                    "schema": {
+                        "type": "object",
+                        "properties": {"relevant": {"type": "array", "items": {"type": "string"}}},
+                        "required": ["relevant"],
+                        "additionalProperties": False,
+                    },
+                }},
+                None,
+            ]
+        for fmt in formats_to_try:
+            try:
+                kwargs = {}
+                if fmt is not None:
+                    kwargs["response_format"] = fmt
+                response = self._client.chat.completions.create(
+                    model=self._config.model,
+                    messages=messages,
+                    **kwargs,
+                )
+                return clean_response(response.choices[0].message.content or "")
+            except openai.BadRequestError:
+                continue
+        return ""
+
     def is_available(self) -> bool:
         try:
             self._client.models.list()
diff --git a/src/ownscribe/summarization/prompts.py b/src/ownscribe/summarization/prompts.py
index b2df770..8ad8dd8 100644
--- a/src/ownscribe/summarization/prompts.py
+++ b/src/ownscribe/summarization/prompts.py
@@ -112,3 +112,38 @@ def resolve_template(
 def list_templates() -> list[str]:
     """Return the names of all built-in templates."""
     return list(TEMPLATES.keys())
+
+
+# --- Search prompts ---
+
+SEARCH_FIND_SYSTEM = (
+    "You are a meeting search assistant. Given meeting summaries and a question, "
+    "identify which meetings are relevant to answering the question. "
+    "You MUST choose only from the provided meeting IDs. "
+    "Return at most 5 meetings per request. When in doubt, include the meeting "
+    "— it is better to include a marginally relevant meeting than to miss one. "
+    'Return a JSON object: {"relevant": ["id1", "id2"]}. '
+    'If none are relevant, return {"relevant": []}.'
+)
+
+SEARCH_FIND_PROMPT = """Question: {question}
+
+Meetings:
+{summaries}
+
+Return ONLY valid JSON: {{"relevant": ["meeting-id-1", "meeting-id-2"]}}"""
+
+SEARCH_ANSWER_SYSTEM = (
+    "You are a meeting assistant. Answer the user's question based on the meeting "
+    "transcripts provided. Each transcript is prefixed with a header like "
+    "'## [folder-name] Date Time — Title'. "
+    "When quoting or referencing information, always cite the meeting it came from "
+    "by including the meeting display name (e.g., '2026-02-13 15:01 — Quarterly Planning'). "
+    "Use verbatim text from the transcript for quotes. "
+    "Include the speaker label and timestamp for each quote. "
+    "If the answer is not found in the transcripts, say so."
+)
+
+SEARCH_ANSWER_PROMPT = """Question: {question}
+
+{transcripts}"""
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index f1bc8df..fa12194 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -181,7 +181,7 @@ def test_transcribe_and_summarize(self, tmp_path):
 
         with (
             mock.patch("ownscribe.pipeline._create_transcriber", return_value=mock_transcriber),
-            mock.patch("ownscribe.pipeline._create_summarizer", return_value=mock_summarizer),
+            mock.patch("ownscribe.pipeline.create_summarizer", return_value=mock_summarizer),
         ):
             _do_transcribe_and_summarize(config, audio_path, tmp_path, summarize=True)
 
@@ -206,7 +206,7 @@ def test_summarizer_unavailable_skips_gracefully(self, tmp_path):
 
         with (
             mock.patch("ownscribe.pipeline._create_transcriber", return_value=mock_transcriber),
-            mock.patch("ownscribe.pipeline._create_summarizer", return_value=mock_summarizer),
+            mock.patch("ownscribe.pipeline.create_summarizer", return_value=mock_summarizer),
         ):
             _do_transcribe_and_summarize(config, audio_path, tmp_path, summarize=True)
 
diff --git a/tests/test_search.py b/tests/test_search.py
new file mode 100644
index 0000000..ece50d4
--- /dev/null
+++ b/tests/test_search.py
@@ -0,0 +1,605 @@
+"""Tests for the semantic search module."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import click
+
+from ownscribe.search import (
+    _answer_from_transcripts,
+    _build_summary_chunks,
+    _discover_meetings,
+    _estimate_tokens,
+    _extract_keywords,
+    _extract_quotes,
+    _find_relevant_meetings,
+    _key_phrases,
+    _keyword_fallback,
+    _parse_relevant_ids,
+    _rank_meetings,
+    _verify_quotes,
+)
+
+# -- Helpers --
+
+
+def _make_meeting_dir(base: Path, folder_name: str, summary: str, transcript: str | None = None) -> None:
+    folder = base / folder_name
+    folder.mkdir(parents=True, exist_ok=True)
+    (folder / "summary.md").write_text(summary)
+    if transcript is not None:
+        (folder / "transcript.md").write_text(transcript)
+
+
+class FakeSummarizer:
+    """A fake summarizer that returns canned responses."""
+
+    def __init__(self, responses: list[str] | None = None):
+        self.calls: list[tuple[str, str, bool]] = []
+        self._responses = list(responses or [])
+        self._call_idx = 0
+
+    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+        self.calls.append((system_prompt, user_prompt, json_mode))
+        if self._responses:
+            resp = self._responses[self._call_idx % len(self._responses)]
+            self._call_idx += 1
+            return resp
+        return '{"relevant": []}'
+
+
+# -- Discovery tests --
+
+
+class TestDiscoverMeetings:
+    def test_discover_meetings(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary of Q1 planning")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Daily standup notes")
+
+        meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None)
+        assert len(meetings) == 2
+        assert skipped == 0
+        # newest first
+        assert meetings[0].folder_name == "2026-02-13_1501_quarterly-planning"
+        assert meetings[1].folder_name == "2026-02-12_0930_team-standup"
+        assert "Quarterly Planning" in meetings[0].display_name
+        assert "2026-02-13 15:01" in meetings[0].display_name
+
+    def test_discover_meetings_no_slug(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-16_1433", "Summary without title")
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary of Q1 planning")
+
+        meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None)
+        assert len(meetings) == 2
+        assert skipped == 0
+        # newest first
+        assert meetings[0].folder_name == "2026-02-16_1433"
+        assert meetings[0].display_name == "2026-02-16 14:33"
+        assert meetings[1].folder_name == "2026-02-13_1501_quarterly-planning"
+
+    def test_discover_meetings_since_filter(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Summary")
+        _make_meeting_dir(tmp_path, "2026-01-10_1000_old-meeting", "Summary")
+
+        meetings, _ = _discover_meetings(tmp_path, since="2026-02-01", limit=None)
+        assert len(meetings) == 2
+        assert all("2026-02" in m.folder_name for m in meetings)
+
+    def test_discover_meetings_limit(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Summary")
+        _make_meeting_dir(tmp_path, "2026-02-11_1000_old-meeting", "Summary")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=2)
+        assert len(meetings) == 2
+        # Should be the 2 newest
+        assert meetings[0].folder_name == "2026-02-13_1501_quarterly-planning"
+        assert meetings[1].folder_name == "2026-02-12_0930_team-standup"
+
+    def test_discover_meetings_skips_no_summary(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary")
+        # Folder with no summary file
+        no_summary = tmp_path / "2026-02-12_0930_team-standup"
+        no_summary.mkdir()
+        (no_summary / "transcript.md").write_text("some transcript")
+
+        meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None)
+        assert len(meetings) == 1
+        assert skipped == 1
+        assert meetings[0].folder_name == "2026-02-13_1501_quarterly-planning"
+
+    def test_discover_meetings_empty_dir(self, tmp_path):
+        meetings, skipped = _discover_meetings(tmp_path, since=None, limit=None)
+        assert meetings == []
+        assert skipped == 0
+
+    def test_discover_meetings_nonexistent_dir(self, tmp_path):
+        meetings, skipped = _discover_meetings(tmp_path / "nonexistent", since=None, limit=None)
+        assert meetings == []
+        assert skipped == 0
+
+
+# -- Token estimation --
+
+
+class TestEstimateTokens:
+    def test_estimate_tokens(self):
+        assert _estimate_tokens("a" * 100) == 25
+        assert _estimate_tokens("") == 0
+        assert _estimate_tokens("hello world") == 2
+
+
+# -- Chunking --
+
+
+class TestBuildSummaryChunks:
+    def test_build_summary_chunks(self, tmp_path):
+        # Create meetings with summaries of known size
+        for i in range(5):
+            _make_meeting_dir(
+                tmp_path,
+                f"2026-02-{10+i:02d}_1000_meeting-{i}",
+                "x" * 2000,  # 500 tokens each
+            )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        # Small budget that forces multiple chunks
+        chunks = _build_summary_chunks(meetings, context_budget=2000)
+        assert len(chunks) > 1
+        # All meetings accounted for
+        all_ids = {m.folder_name for chunk in chunks for m in chunk}
+        assert all_ids == {m.folder_name for m in meetings}
+
+    def test_build_summary_chunks_single(self, tmp_path):
+        for i in range(3):
+            _make_meeting_dir(
+                tmp_path,
+                f"2026-02-{10+i:02d}_1000_meeting-{i}",
+                "Short summary",
+            )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        # Large budget - everything fits in one chunk
+        chunks = _build_summary_chunks(meetings, context_budget=100000)
+        assert len(chunks) == 1
+        assert len(chunks[0]) == 3
+
+
+# -- Parse relevant IDs --
+
+
+class TestParseRelevantIds:
+    def test_valid_json(self):
+        assert _parse_relevant_ids('{"relevant": ["id1", "id2"]}') == ["id1", "id2"]
+
+    def test_empty_relevant(self):
+        assert _parse_relevant_ids('{"relevant": []}') == []
+
+    def test_json_with_extra_text(self):
+        response = 'Here are the results: {"relevant": ["id1"]} That is all.'
+        assert _parse_relevant_ids(response) == ["id1"]
+
+    def test_bare_array(self):
+        assert _parse_relevant_ids('["id1", "id2"]') == ["id1", "id2"]
+
+    def test_unparseable(self):
+        assert _parse_relevant_ids("I don't know") is None
+
+
+# -- Keyword extraction --
+
+
+class TestExtractKeywords:
+    def test_filters_stop_words(self):
+        kw = _extract_keywords("What is the budget for the project?")
+        assert "budget" in kw
+        assert "project" in kw
+        assert "the" not in kw
+        assert "is" not in kw
+
+    def test_strips_punctuation(self):
+        kw = _extract_keywords("Hello, world! How's it going?")
+        assert "hello" in kw
+        assert "world" in kw
+
+    def test_all_stop_words_fallback(self):
+        kw = _extract_keywords("is the a")
+        # Falls back to returning all words when only stop words remain
+        assert len(kw) > 0
+        assert "is" in kw
+
+    def test_empty_string(self):
+        kw = _extract_keywords("")
+        assert kw == set()
+
+
+# -- Keyword fallback --
+
+
+class TestKeywordFallback:
+    def test_match_on_summary(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "budget review fiscal quarter")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "standup tasks blockers")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        matches = _keyword_fallback("What about the budget?", meetings)
+        assert len(matches) == 1
+        assert matches[0].folder_name == "2026-02-13_1501_quarterly-planning"
+
+    def test_match_on_transcript_only(self, tmp_path):
+        _make_meeting_dir(
+            tmp_path, "2026-02-13_1501_quarterly-planning",
+            "Generic meeting notes",  # summary has no useful keywords
+            "Alice discussed the infrastructure migration timeline",  # transcript does
+        )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        matches = _keyword_fallback("infrastructure migration", meetings)
+        assert len(matches) == 1
+        assert matches[0].folder_name == "2026-02-13_1501_quarterly-planning"
+
+    def test_no_match(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "budget review fiscal quarter")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        matches = _keyword_fallback("kubernetes deployment", meetings)
+        assert len(matches) == 0
+
+
+# -- Find relevant meetings --
+
+
+class TestFindRelevantMeetings:
+    def test_single_batch(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary about deadlines and Q1")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Daily standup notes")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        fake = FakeSummarizer(['{"relevant": ["2026-02-13_1501_quarterly-planning"]}'])
+
+        result = _find_relevant_meetings(fake, "What about the deadline?", meetings, 100000)
+        assert len(result) == 1
+        assert result[0].folder_name == "2026-02-13_1501_quarterly-planning"
+        assert fake.calls[0][2] is True  # json_mode
+
+    def test_drops_unknown_ids(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        fake = FakeSummarizer(['{"relevant": ["2026-02-13_1501_quarterly-planning", "nonexistent-id"]}'])
+
+        result = _find_relevant_meetings(fake, "question", meetings, 100000)
+        assert len(result) == 1
+        assert result[0].folder_name == "2026-02-13_1501_quarterly-planning"
+
+    def test_multi_batch(self, tmp_path):
+        for i in range(5):
+            _make_meeting_dir(
+                tmp_path,
+                f"2026-02-{10+i:02d}_1000_meeting-{i}",
+                "x" * 2000,
+            )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        # Meetings are newest-first, each gets its own batch:
+        # batch 0=meeting-4, 1=meeting-3, 2=meeting-2, 3=meeting-1, 4=meeting-0
+        responses = [
+            '{"relevant": ["2026-02-14_1000_meeting-4"]}',
+            '{"relevant": []}',
+            '{"relevant": []}',
+            '{"relevant": ["2026-02-11_1000_meeting-1"]}',
+            '{"relevant": []}',
+        ]
+        fake = FakeSummarizer(responses)
+
+        result = _find_relevant_meetings(fake, "question", meetings, context_size=2000)
+        result_ids = {m.folder_name for m in result}
+        assert "2026-02-14_1000_meeting-4" in result_ids
+        assert "2026-02-11_1000_meeting-1" in result_ids
+
+    def test_json_fallback_regex(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        # Malformed but extractable JSON
+        fake = FakeSummarizer(['Sure! {"relevant": ["2026-02-13_1501_quarterly-planning"]}'])
+
+        result = _find_relevant_meetings(fake, "question", meetings, 100000)
+        assert len(result) == 1
+
+    def test_total_fallback(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_quarterly-planning", "Summary")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "Summary")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        # Completely unparseable
+        fake = FakeSummarizer(["I have no idea what to return"])
+
+        result = _find_relevant_meetings(fake, "question", meetings, 100000)
+        # All meetings should be included as fallback
+        assert len(result) == 2
+
+
+class TestFindRelevantMeetingsKeywordFallback:
+    def test_llm_empty_falls_back_to_keywords(self, tmp_path):
+        """When the LLM returns no relevant IDs, keyword fallback finds the meeting."""
+        _make_meeting_dir(
+            tmp_path,
+            "2026-02-13_1501_quarterly-planning",
+            "Generic meeting summary",  # summary is too vague for LLM
+            "Alice discussed the infrastructure migration timeline in detail",
+        )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        # LLM returns empty relevant list
+        fake = FakeSummarizer(['{"relevant": []}'])
+
+        result = _find_relevant_meetings(
+            fake, "infrastructure migration", meetings, 100000,
+        )
+        assert len(result) == 1
+        assert result[0].folder_name == "2026-02-13_1501_quarterly-planning"
+
+
+# -- Ranking --
+
+
+class TestRankMeetings:
+    def test_keyword_overlap(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_budget-review", "budget review fiscal quarter spending")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_team-standup", "standup tasks blockers")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        ranked = _rank_meetings("What was discussed about the budget?", meetings)
+        assert ranked[0].folder_name == "2026-02-13_1501_budget-review"
+
+    def test_speaker_boost(self, tmp_path):
+        _make_meeting_dir(tmp_path, "2026-02-13_1501_planning", "Bob discussed the timeline")
+        _make_meeting_dir(tmp_path, "2026-02-12_0930_standup", "Anna mentioned the deadline and blockers")
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        ranked = _rank_meetings("What did Anna say about the deadline?", meetings)
+        assert ranked[0].folder_name == "2026-02-12_0930_standup"
+
+
+# -- Answer from transcripts --
+
+
+class TestAnswerFromTranscripts:
+    def test_answer(self, tmp_path):
+        _make_meeting_dir(
+            tmp_path,
+            "2026-02-13_1501_quarterly-planning",
+            "Summary",
+            "Alice: The deadline is March 15th.\nBob: Got it.",
+        )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        fake = FakeSummarizer(["Anna said the deadline is March 15th."])
+
+        answer, skipped = _answer_from_transcripts(fake, "What about the deadline?", meetings, 100000)
+        assert "March 15th" in answer
+        assert skipped == 0
+
+    def test_budget_overflow(self, tmp_path):
+        # Create a meeting with a very large transcript
+        _make_meeting_dir(
+            tmp_path,
+            "2026-02-13_1501_big-meeting",
+            "Summary",
+            "x" * 100000,
+        )
+        _make_meeting_dir(
+            tmp_path,
+            "2026-02-12_0930_small-meeting",
+            "Summary",
+            "Alice: Short transcript.",
+        )
+
+        meetings, _ = _discover_meetings(tmp_path, since=None, limit=None)
+        fake = FakeSummarizer(["Answer based on available transcripts."])
+
+        # Tiny budget: only the small one should fit
+        answer, skipped = _answer_from_transcripts(fake, "question", meetings, context_size=2000)
+        # The big one should be skipped, function should still return an answer
+        assert "Answer based on available transcripts." in answer
+        assert skipped > 0
+
+
+# -- Quote verification --
+
+
+class TestVerifyQuotes:
+    def test_phrase_match(self):
+        answer = '> The deadline for Q1 deliverables is March 15th but I think we should aim for March 10th'
+        transcripts = {
+            "meeting-1": (
+                "The deadline for Q1 deliverables is March 15th but I think"
+                " we should aim for March 10th to have buffer time."
+            ),
+        }
+        result = _verify_quotes(answer, transcripts)
+        assert "[unverified]" not in result
+
+    def test_not_found(self):
+        answer = '> The completely fabricated quote that does not exist in any transcript at all'
+        transcripts = {
+            "meeting-1": "Alice: Let's discuss the budget.\nBob: Sure, sounds good.",
+        }
+        result = _verify_quotes(answer, transcripts)
+        assert "[unverified]" in result
+
+    def test_empty_transcripts(self):
+        answer = "> Some quote here"
+        result = _verify_quotes(answer, {})
+        assert result == answer
+
+
+class TestExtractQuotes:
+    def test_blockquotes(self):
+        text = "Text before\n> This is a quote\n> continued here\nText after"
+        quotes = _extract_quotes(text)
+        assert any("This is a quote" in q for q in quotes)
+
+    def test_inline_quotes(self):
+        text = 'She said "The deadline is definitely March 15th for all deliverables" in the meeting.'
+        quotes = _extract_quotes(text)
+        assert any("deadline" in q for q in quotes)
+
+
+class TestKeyPhrases:
+    def test_short_quote(self):
+        phrases = _key_phrases("too short")
+        assert phrases == []
+
+    def test_medium_quote(self):
+        phrases = _key_phrases("one two three four five six seven eight nine ten")
+        assert len(phrases) >= 1
+        assert all(len(p.split()) >= 8 for p in phrases)
+
+
+# -- Integration test --
+
+
+class TestAskIntegration:
+    def test_end_to_end(self, httpserver, tmp_path, monkeypatch):
+        # Set up mock meetings
+        _make_meeting_dir(
+            tmp_path,
+            "2026-02-13_1501_quarterly-planning",
+            "Discussed Q1 deadlines. Anna mentioned March 15th target.",
+            "[00:01:00] Anna: The deadline is March 15th.\n[00:01:30] Bob: Acknowledged.",
+        )
+        _make_meeting_dir(
+            tmp_path,
+            "2026-02-12_0930_team-standup",
+            "Daily standup. Discussed blockers.",
+            "[00:00:30] Charlie: No updates.\n[00:01:00] Dave: Still blocked.",
+        )
+
+        # Stage 1 response (find)
+        find_response = {
+            "message": {
+                "role": "assistant",
+                "content": '{"relevant": ["2026-02-13_1501_quarterly-planning"]}',
+            },
+            "done": True,
+        }
+        # Stage 2 response (answer)
+        answer_response = {
+            "message": {
+                "role": "assistant",
+                "content": 'Anna said:\n> The deadline is March 15th.\n\nThis was in the Quarterly Planning meeting.',
+            },
+            "done": True,
+        }
+        # Auto-detect context size calls /api/show
+        show_response = {
+            "model_info": {"general.context_length": 8192},
+        }
+        httpserver.expect_ordered_request("/api/show", method="POST").respond_with_json(show_response)
+        httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(find_response)
+        httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(answer_response)
+
+        from ownscribe.config import Config
+
+        config = Config()
+        config.output.dir = str(tmp_path)
+        config.summarization.host = httpserver.url_for("")
+        config.summarization.backend = "ollama"
+        config.summarization.model = "test-model"
+
+        from ownscribe.search import ask
+
+        output_lines: list[str] = []
+        monkeypatch.setattr(click, "echo", lambda msg="": output_lines.append(str(msg)))
+
+        ask(config, "What did Anna say about the deadline?", since=None, limit=None)
+
+        output = "\n".join(output_lines)
+        assert "Found 1 relevant" in output
+        assert "Quarterly Planning" in output
+        assert "March 15th" in output
+
+
+# -- OpenAI json_mode fallback tests --
+
+
+def _openai_ok_response(content: str) -> dict:
+    return {
+        "id": "chatcmpl-test",
+        "object": "chat.completion",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": content},
+                "finish_reason": "stop",
+            }
+        ],
+    }
+
+
+def _openai_400_response() -> tuple[dict, int]:
+    return (
+        {"error": {
+            "message": "'response_format.type' must be 'json_schema' or 'text'",
+            "type": "invalid_request_error",
+        }},
+        400,
+    )
+
+
+class TestOpenAIChatJsonModeFallback:
+    def test_openai_chat_json_mode_fallback(self, httpserver):
+        """json_object and json_schema both fail → falls back to no response_format."""
+        from ownscribe.config import SummarizationConfig
+        from ownscribe.summarization.openai_summarizer import OpenAISummarizer
+
+        body_400, status_400 = _openai_400_response()
+        ep = "/v1/chat/completions"
+        # 1st attempt (json_object) → 400
+        httpserver.expect_ordered_request(ep, method="POST").respond_with_json(
+            body_400, status=status_400,
+        )
+        # 2nd attempt (json_schema) → 400
+        httpserver.expect_ordered_request(ep, method="POST").respond_with_json(
+            body_400, status=status_400,
+        )
+        # 3rd attempt (no response_format) → 200
+        httpserver.expect_ordered_request(ep, method="POST").respond_with_json(
+            _openai_ok_response('{"relevant": ["meeting-1"]}'),
+        )
+
+        cfg = SummarizationConfig()
+        cfg.host = httpserver.url_for("")
+        cfg.model = "test-model"
+        summarizer = OpenAISummarizer(cfg)
+
+        result = summarizer.chat("system", "user", json_mode=True)
+        assert '"relevant"' in result or "meeting-1" in result
+
+    def test_openai_chat_json_schema_fallback(self, httpserver):
+        """json_object fails → falls back to json_schema which succeeds."""
+        from ownscribe.config import SummarizationConfig
+        from ownscribe.summarization.openai_summarizer import OpenAISummarizer
+
+        body_400, status_400 = _openai_400_response()
+        ep = "/v1/chat/completions"
+        # 1st attempt (json_object) → 400
+        httpserver.expect_ordered_request(ep, method="POST").respond_with_json(
+            body_400, status=status_400,
+        )
+        # 2nd attempt (json_schema) → 200
+        httpserver.expect_ordered_request(ep, method="POST").respond_with_json(
+            _openai_ok_response('{"relevant": ["meeting-2"]}'),
+        )
+
+        cfg = SummarizationConfig()
+        cfg.host = httpserver.url_for("")
+        cfg.model = "test-model"
+        summarizer = OpenAISummarizer(cfg)
+
+        result = summarizer.chat("system", "user", json_mode=True)
+        assert "meeting-2" in result

From 77adb795fd0dc0b2489230461f677bf3abbc0127 Mon Sep 17 00:00:00 2001
From: Pascal Berrang <git@p4l.dev>
Date: Wed, 18 Feb 2026 14:54:59 +0100
Subject: [PATCH 2/5] Improve prompts for `ask` feature to give more consistent
 answers

---
 src/ownscribe/summarization/prompts.py | 37 +++++++++++++++++++-------
 tests/test_search.py                   |  6 ++++-
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/ownscribe/summarization/prompts.py b/src/ownscribe/summarization/prompts.py
index 8ad8dd8..e6ff138 100644
--- a/src/ownscribe/summarization/prompts.py
+++ b/src/ownscribe/summarization/prompts.py
@@ -134,16 +134,35 @@ def list_templates() -> list[str]:
 Return ONLY valid JSON: {{"relevant": ["meeting-id-1", "meeting-id-2"]}}"""
 
 SEARCH_ANSWER_SYSTEM = (
-    "You are a meeting assistant. Answer the user's question based on the meeting "
-    "transcripts provided. Each transcript is prefixed with a header like "
-    "'## [folder-name] Date Time — Title'. "
-    "When quoting or referencing information, always cite the meeting it came from "
-    "by including the meeting display name (e.g., '2026-02-13 15:01 — Quarterly Planning'). "
-    "Use verbatim text from the transcript for quotes. "
-    "Include the speaker label and timestamp for each quote. "
-    "If the answer is not found in the transcripts, say so."
+    "You are a meeting assistant. Answer the user's question based on the "
+    "meeting transcripts provided. Each transcript is prefixed with a header "
+    "like '## [folder-name] Date Time — Title'.\n"
+    "\n"
+    "Always use this exact format:\n"
+    "\n"
+    "1. Start with a 1-2 sentence summary answering the question.\n"
+    "2. Then list supporting quotes grouped by meeting:\n"
+    "\n"
+    "**Meeting display name**\n"
+    "- **SPEAKER** [timestamp]: \"Verbatim quote from the transcript.\"\n"
+    "\n"
+    "Example:\n"
+    "\n"
+    "The deadline was set for March 15th.\n"
+    "\n"
+    "**2026-02-13 15:01 — Quarterly Planning**\n"
+    "- **SPEAKER_03** [00:13]: \"So the deadline for Q1 deliverables is March 15th.\"\n"
+    "- **SPEAKER_01** [00:21]: \"They will hear from us by Friday.\"\n"
+    "\n"
+    "Rules:\n"
+    "- Use the meeting display name from the transcript header (after the folder name in brackets).\n"
+    "- Copy quotes verbatim from the transcript. Do not paraphrase.\n"
+    "- If the answer is not found in the transcripts, say so.\n"
+    "- Keep the answer concise."
 )
 
 SEARCH_ANSWER_PROMPT = """Question: {question}
 
-{transcripts}"""
+{transcripts}
+
+Answer using the format from your instructions (summary, then quoted evidence grouped by meeting)."""
diff --git a/tests/test_search.py b/tests/test_search.py
index ece50d4..9f4b1a1 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -491,7 +491,11 @@ def test_end_to_end(self, httpserver, tmp_path, monkeypatch):
         answer_response = {
             "message": {
                 "role": "assistant",
-                "content": 'Anna said:\n> The deadline is March 15th.\n\nThis was in the Quarterly Planning meeting.',
+                "content": (
+                    'The deadline is March 15th.\n\n'
+                    '**2026-02-13 15:01 — Quarterly Planning**\n'
+                    '- **Anna** [00:01:00]: "The deadline is March 15th."'
+                ),
             },
             "done": True,
         }

From e4dcb7752e1c5d98c6948c0a2ed0cd01addfc4a5 Mon Sep 17 00:00:00 2001
From: Pascal Berrang <git@p4l.dev>
Date: Wed, 18 Feb 2026 14:55:15 +0100
Subject: [PATCH 3/5] Help AI agents with AGENTS.md file

---
 AGENTS.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..870258e
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,50 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Commands
+
+```bash
+uv run pytest                        # run all tests
+uv run pytest -v                     # verbose
+uv run pytest -v -k test_search      # run a specific test module
+uv run pytest -v -k "TestRankMeetings::test_speaker_boost"  # single test
+uv run ruff check src/ tests/        # lint
+uv run ruff format src/ tests/       # auto-format
+```
+
+## Architecture
+
+**ownscribe** is a CLI tool for local meeting recording, transcription, and summarization. The main pipeline is: Record → Transcribe → Summarize → Output.
+
+### Plugin systems with abstract base classes
+
+Each stage has a base class in its subpackage and one or more implementations:
+
+- **Audio** (`audio/base.py`): `CoreAudioRecorder` (macOS, wraps a Swift binary in `swift/`) and `SoundDeviceRecorder` (cross-platform fallback). Selected in `pipeline.py:_create_recorder()`.
+- **Transcription** (`transcription/base.py`): `WhisperXTranscriber` (single impl). Data models (`Segment`, `Word`, `TranscriptResult`) live in `transcription/models.py`.
+- **Summarization** (`summarization/base.py`): `OllamaSummarizer` and `OpenAISummarizer`. Factory is `summarization/__init__.py:create_summarizer()` — used by both `pipeline.py` and `search.py`.
+- **Output** (`output/`): `markdown.py` and `json_output.py`, selected by `config.output.format`.
+
+### Key modules
+
+- **`cli.py`** — Click command group. Entry point: `ownscribe.cli:cli`. All subcommands (`ask`, `transcribe`, `summarize`, `devices`, `apps`, `config`, `cleanup`).
+- **`pipeline.py`** — Orchestrates the record → transcribe → summarize flow. Creates timestamped output dirs (`~/ownscribe/YYYY-MM-DD_HHMM_slug/`).
+- **`search.py`** — Two-stage LLM search over meeting notes. Stage 1 scores summaries for relevance, stage 2 synthesizes answers from full transcripts. Has keyword fallback and quote verification. Helper functions return data; only `ask()` calls `click.echo`.
+- **`config.py`** — Dataclass hierarchy (`Config` → `AudioConfig`, `TranscriptionConfig`, `SummarizationConfig`, etc.). Loaded from `~/.config/ownscribe/config.toml` with env var overrides (`HF_TOKEN`, `OLLAMA_HOST`).
+- **`summarization/prompts.py`** — Built-in prompt templates (meeting, lecture, brief) plus search prompts. Users can define custom templates in config TOML.
+
+### Testing conventions
+
+- Uses `pytest` with `pytest-httpserver` for mocking HTTP APIs (Ollama, OpenAI).
+- Shared fixtures in `conftest.py`: `sample_transcript`, `diarized_transcript`, `synthetic_wav`.
+- Tests use `FakeSummarizer` (in `test_search.py`) or `unittest.mock` for pipeline tests.
+- Markers: `@pytest.mark.hardware` (auto-skipped in CI), `@pytest.mark.macos` (auto-skipped on non-macOS).
+- When mocking the shared summarizer factory in pipeline tests, patch `ownscribe.pipeline.create_summarizer` (it's imported at module level).
+
+## Style
+
+- Python 3.12+. Ruff with line-length 120.
+- `from __future__ import annotations` in all modules.
+- Lazy imports for heavy dependencies (whisperx, ollama, openai) — imported inside functions, not at module level.
+- Helper functions return data; orchestrator functions (`ask()`, `run_pipeline()`) handle all `click.echo` output.

From c8d368351839a0935dae91362d2d592b616e0b88 Mon Sep 17 00:00:00 2001
From: Pascal Berrang <git@p4l.dev>
Date: Wed, 18 Feb 2026 15:08:17 +0100
Subject: [PATCH 4/5] Address GitHub's Copilot comments

---
 src/ownscribe/search.py | 28 ++++++++++++++++++++++------
 tests/test_search.py    | 26 ++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/src/ownscribe/search.py b/src/ownscribe/search.py
index 33e29fd..2393d43 100644
--- a/src/ownscribe/search.py
+++ b/src/ownscribe/search.py
@@ -43,7 +43,15 @@ def ask(config: Config, question: str, since: str | None, limit: int | None) ->
     if skipped:
         click.echo(f"({skipped} meetings without summaries were skipped)")
 
+    if not config.summarization.enabled:
+        click.echo("Summarization must be enabled to use ask. Check your configuration.")
+        return
+
     summarizer = create_summarizer(config)
+    if not summarizer.is_available():
+        click.echo("Summarization backend is not reachable. Check your configuration.")
+        return
+
     context_size = _resolve_context_size(config)
 
     # Stage 1
@@ -353,7 +361,7 @@ def _find_relevant_meetings(
                 spinner.update("Falling back to keyword search")
             return _rank_meetings(question, keyword_matches)
 
-    # Build result list preserving order, then rank
+    # Build result list from unique IDs, then rank
     meeting_by_id = {m.folder_name: m for m in meetings}
     relevant = [meeting_by_id[id_] for id_ in all_relevant_ids if id_ in meeting_by_id]
     return _rank_meetings(question, relevant)
@@ -410,7 +418,6 @@ def _answer_from_transcripts(
 
     transcript_parts: list[str] = []
     used_tokens = 0
-    included = 0
     skipped = 0
 
     for m in meetings:
@@ -428,7 +435,6 @@ def _answer_from_transcripts(
 
         transcript_parts.append(entry)
         used_tokens += entry_tokens
-        included += 1
 
     if not transcript_parts:
         return "No transcript text available for the relevant meetings.", skipped
@@ -469,7 +475,7 @@ def _key_phrases(quote: str, min_words: int = 8, max_words: int = 12) -> list[st
     """Extract key phrases (8-12 word spans) from a quote for verification."""
     words = quote.split()
     if len(words) <= max_words:
-        return [" ".join(words)] if len(words) >= 4 else []
+        return [" ".join(words)] if len(words) >= min_words else []
 
     phrases = []
     # Take a few spans spread across the quote
@@ -505,13 +511,23 @@ def _verify_quotes(answer: str, transcripts: dict[str, str]) -> str:
     # Annotate unverified quotes in the answer
     result_lines: list[str] = []
     for line in answer.split("\n"):
-        result_lines.append(line)
         stripped = line.strip()
-        # Check if this line ends a blockquote that contains an unverified quote
+        annotated = False
+        # Blockquote annotation
         if stripped.startswith(">"):
             quote_text = stripped.lstrip("> ").strip()
             for uq in unverified_quotes:
                 if quote_text and quote_text in uq:
+                    result_lines.append(line)
                     result_lines.append("[unverified]")
+                    annotated = True
                     break
+        # Inline quote annotation: mark "..." segments that are unverified
+        if not annotated:
+            modified = line
+            for uq in unverified_quotes:
+                target = f'"{uq}"'
+                if target in modified:
+                    modified = modified.replace(target, f'{target} [unverified]')
+            result_lines.append(modified)
     return "\n".join(result_lines)
diff --git a/tests/test_search.py b/tests/test_search.py
index 9f4b1a1..fa033b6 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -431,6 +431,31 @@ def test_not_found(self):
         result = _verify_quotes(answer, transcripts)
         assert "[unverified]" in result
 
+    def test_inline_quote_not_found(self):
+        answer = (
+            'Anna said "The completely fabricated quote that does not'
+            ' exist in any transcript at all" in the meeting.'
+        )
+        transcripts = {
+            "meeting-1": "Alice: Let's discuss the budget.\nBob: Sure, sounds good.",
+        }
+        result = _verify_quotes(answer, transcripts)
+        assert "[unverified]" in result
+
+    def test_inline_quote_verified(self):
+        answer = (
+            'Anna said "The deadline for Q1 deliverables is March 15th'
+            ' but I think we should aim for March 10th" in the meeting.'
+        )
+        transcripts = {
+            "meeting-1": (
+                "The deadline for Q1 deliverables is March 15th but I think"
+                " we should aim for March 10th to have buffer time."
+            ),
+        }
+        result = _verify_quotes(answer, transcripts)
+        assert "[unverified]" not in result
+
     def test_empty_transcripts(self):
         answer = "> Some quote here"
         result = _verify_quotes(answer, {})
@@ -503,6 +528,7 @@ def test_end_to_end(self, httpserver, tmp_path, monkeypatch):
         show_response = {
             "model_info": {"general.context_length": 8192},
         }
+        httpserver.expect_ordered_request("/api/tags", method="GET").respond_with_json({"models": []})
         httpserver.expect_ordered_request("/api/show", method="POST").respond_with_json(show_response)
         httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(find_response)
         httpserver.expect_ordered_request("/api/chat", method="POST").respond_with_json(answer_response)

From b809eccc28fe66c7413f7e7bb7f176f9e4bd0e93 Mon Sep 17 00:00:00 2001
From: Pascal Berrang <git@p4l.dev>
Date: Wed, 18 Feb 2026 15:20:37 +0100
Subject: [PATCH 5/5] Keep `summarizer.chat` generic, move JSON schema into
 caller

---
 src/ownscribe/search.py                       | 13 ++++++++-
 src/ownscribe/summarization/base.py           |  5 +++-
 .../summarization/ollama_summarizer.py        |  5 +++-
 .../summarization/openai_summarizer.py        | 27 +++++++++----------
 tests/test_search.py                          | 11 +++++---
 5 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/src/ownscribe/search.py b/src/ownscribe/search.py
index 2393d43..4135f4b 100644
--- a/src/ownscribe/search.py
+++ b/src/ownscribe/search.py
@@ -23,6 +23,17 @@
 
 _DEFAULT_CONTEXT_SIZE = 8192
 
+_SEARCH_RESULTS_SCHEMA = {
+    "name": "search_results",
+    "strict": True,
+    "schema": {
+        "type": "object",
+        "properties": {"relevant": {"type": "array", "items": {"type": "string"}}},
+        "required": ["relevant"],
+        "additionalProperties": False,
+    },
+}
+
 
 class Meeting(NamedTuple):
     folder_name: str
@@ -344,7 +355,7 @@ def _find_relevant_meetings(
         summaries = "\n\n".join(summaries_parts)
 
         prompt = SEARCH_FIND_PROMPT.format(question=question, summaries=summaries)
-        response = summarizer.chat(SEARCH_FIND_SYSTEM, prompt, json_mode=True)
+        response = summarizer.chat(SEARCH_FIND_SYSTEM, prompt, json_mode=True, json_schema=_SEARCH_RESULTS_SCHEMA)
 
         ids = _parse_relevant_ids(response)
         if ids is None:
diff --git a/src/ownscribe/summarization/base.py b/src/ownscribe/summarization/base.py
index 0df2930..0858645 100644
--- a/src/ownscribe/summarization/base.py
+++ b/src/ownscribe/summarization/base.py
@@ -17,7 +17,10 @@ def generate_title(self, summary_text: str) -> str:
         """Generate a short meeting title from a summary."""
 
     @abc.abstractmethod
-    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+    def chat(
+        self, system_prompt: str, user_prompt: str,
+        json_mode: bool = False, json_schema: dict | None = None,
+    ) -> str:
         """Send a chat completion request and return the response text."""
 
     @abc.abstractmethod
diff --git a/src/ownscribe/summarization/ollama_summarizer.py b/src/ownscribe/summarization/ollama_summarizer.py
index 6a90f90..0b7e860 100644
--- a/src/ownscribe/summarization/ollama_summarizer.py
+++ b/src/ownscribe/summarization/ollama_summarizer.py
@@ -17,7 +17,10 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) -
         self._templates = templates or {}
         self._client = ollama.Client(host=config.host)
 
-    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+    def chat(
+        self, system_prompt: str, user_prompt: str,
+        json_mode: bool = False, json_schema: dict | None = None,
+    ) -> str:
         kwargs = {}
         if json_mode:
             kwargs["format"] = "json"
diff --git a/src/ownscribe/summarization/openai_summarizer.py b/src/ownscribe/summarization/openai_summarizer.py
index 2d1128f..9bf771b 100644
--- a/src/ownscribe/summarization/openai_summarizer.py
+++ b/src/ownscribe/summarization/openai_summarizer.py
@@ -21,27 +21,24 @@ def __init__(self, config: SummarizationConfig, templates: dict | None = None) -
             base_url = base_url.rstrip("/") + "/v1"
         self._client = openai.OpenAI(base_url=base_url, api_key="not-needed")
 
-    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+    def chat(
+        self, system_prompt: str, user_prompt: str,
+        json_mode: bool = False, json_schema: dict | None = None,
+    ) -> str:
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt},
         ]
         formats_to_try: list[dict | None] = [None]
         if json_mode:
-            formats_to_try = [
-                {"type": "json_object"},
-                {"type": "json_schema", "json_schema": {
-                    "name": "search_results",
-                    "strict": True,
-                    "schema": {
-                        "type": "object",
-                        "properties": {"relevant": {"type": "array", "items": {"type": "string"}}},
-                        "required": ["relevant"],
-                        "additionalProperties": False,
-                    },
-                }},
-                None,
-            ]
+            if json_schema is not None:
+                formats_to_try = [
+                    {"type": "json_object"},
+                    {"type": "json_schema", "json_schema": json_schema},
+                    None,
+                ]
+            else:
+                formats_to_try = [{"type": "json_object"}, None]
         for fmt in formats_to_try:
             try:
                 kwargs = {}
diff --git a/tests/test_search.py b/tests/test_search.py
index fa033b6..93426d9 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -40,7 +40,10 @@ def __init__(self, responses: list[str] | None = None):
         self._responses = list(responses or [])
         self._call_idx = 0
 
-    def chat(self, system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
+    def chat(
+        self, system_prompt: str, user_prompt: str,
+        json_mode: bool = False, json_schema: dict | None = None,
+    ) -> str:
         self.calls.append((system_prompt, user_prompt, json_mode))
         if self._responses:
             resp = self._responses[self._call_idx % len(self._responses)]
@@ -607,7 +610,8 @@ def test_openai_chat_json_mode_fallback(self, httpserver):
         cfg.model = "test-model"
         summarizer = OpenAISummarizer(cfg)
 
-        result = summarizer.chat("system", "user", json_mode=True)
+        schema = {"name": "test", "strict": True, "schema": {"type": "object"}}
+        result = summarizer.chat("system", "user", json_mode=True, json_schema=schema)
         assert '"relevant"' in result or "meeting-1" in result
 
     def test_openai_chat_json_schema_fallback(self, httpserver):
@@ -631,5 +635,6 @@ def test_openai_chat_json_schema_fallback(self, httpserver):
         cfg.model = "test-model"
         summarizer = OpenAISummarizer(cfg)
 
-        result = summarizer.chat("system", "user", json_mode=True)
+        schema = {"name": "test", "strict": True, "schema": {"type": "object"}}
+        result = summarizer.chat("system", "user", json_mode=True, json_schema=schema)
         assert "meeting-2" in result