Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
466b526
fix(llm): use x-goog-api-key header for Gemini
starc007 May 12, 2026
19143d8
fix(llm): validate OPENAI_BASE_URL scheme and host
starc007 May 12, 2026
fee797a
fix(llm): surface body-read failures in API error messages
starc007 May 12, 2026
da12d34
fix(cli): remove dead --file flag from review command
starc007 May 12, 2026
79a3588
fix(update): require confirmation before piping install script to sh
starc007 May 12, 2026
953666d
fix(prompt): warn when truncating diff drops files
starc007 May 12, 2026
2b5d263
fix(prompt): correct duplicate section comment numbering
starc007 May 12, 2026
1b32784
fix(linters): canonicalize paths before matching against diff
starc007 May 12, 2026
557bc7c
fix(config): reject unsafe ignore-glob patterns
starc007 May 12, 2026
f74737d
fix(context): cap per-file size and total files walked
starc007 May 12, 2026
cac271d
fix(history): walk char boundaries when stripping line refs
starc007 May 12, 2026
1adb69f
fix(git): keep .git assertion when canonicalize fails
starc007 May 12, 2026
2ce3bb5
chore: drop dead code paths
starc007 May 12, 2026
5139def
feat(review): validate findings against the diff before reporting
starc007 May 12, 2026
add962c
feat(review): prefer Added lines when re-anchoring findings
starc007 May 12, 2026
172ce61
feat(prompt): add grounding rules + severity rubric to system prompt
starc007 May 12, 2026
53342fa
feat(review): self-critique pass to drop low-signal findings
starc007 May 12, 2026
abb6df8
feat(llm): Anthropic prompt caching for stable prefix
starc007 May 12, 2026
020b151
feat(review): semantic chunking for diffs over the token budget
starc007 May 12, 2026
598781f
feat(output): attach the cited source line to every accepted finding
starc007 May 12, 2026
1112419
chore(prompt): drop string-form build_review_prompt_ctx
starc007 May 12, 2026
0f008d2
docs: document validator, critique, chunking, prompt caching
starc007 May 12, 2026
e78a423
perf(context): build a repo index once, reuse it across chunks
starc007 May 12, 2026
07d77f5
fix(context): disambiguate same-named functions by caller location
starc007 May 12, 2026
0c508a6
perf(context): distill called-function bodies before prompting
starc007 May 12, 2026
b915d14
perf(prompt): filter type fields to those referenced in the diff
starc007 May 12, 2026
dea434a
perf(context): tiered token budget for types / called fns / tests
starc007 May 12, 2026
9b48273
perf(prompt): tighten token estimate to ~3.5 chars/token
starc007 May 12, 2026
1702f8e
fix: spinner Drop guard and O(1) suffix lookup in validator
starc007 May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 59 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@ context: Rich (4 types, 6 called fns, 2 tests)
[!] HIGH src/payments/processor.rs:142
balance + amount can exceed i64::MAX when processing large transfers;
use checked_add() and return Err on overflow
│ let total = balance + amount;

[~] MED src/payments/processor.rs:98
db.execute() result is silently ignored; if the INSERT fails, the caller
receives a success response while the data was never written
│ db.execute(stmt);

[✓] LGTM src/utils/format.rs — change looks correct

3 findings (1 high, 1 med, 0 low) · 4.2s · qwen2.5-coder:14b
note: dropped 1 hallucinated finding(s) (line/file not in diff)
note: critique dropped 1 finding(s) as low-signal
```

---
Expand Down Expand Up @@ -83,6 +87,7 @@ Options:
--fail-on <SEV> Exit 1 if any finding at or above this severity [low|med|high]
--security Security-focused review mode
--no-cloud Never use a cloud LLM
--no-critique Skip the self-critique pass (faster, slightly noisier)
--path <PATH> Path to git repo (default: current directory)
```

Expand Down Expand Up @@ -114,6 +119,7 @@ crev init --ci --model gpt-4o # same, with OpenAI model + correct secret name
```

Installs two hooks:

- **pre-commit**: reviews staged changes before every commit
- **pre-push**: reviews all unpushed commits only when pushing more than one — single commits are already covered by pre-commit

Expand Down Expand Up @@ -194,6 +200,8 @@ export OPENAI_BASE_URL=https://api.groq.com
crev review --model llama-3.1-70b-versatile
```

`OPENAI_BASE_URL` is validated: must use https and must not point at loopback or private hosts. For local vLLM/Ollama proxies set `CREV_ALLOW_INSECURE_BASE_URL=1`.

### Ollama local models

Any model available in your Ollama instance works — crev picks the best one it finds automatically. Pull any coding model and crev will use it:
Expand Down Expand Up @@ -250,7 +258,6 @@ Personal defaults (model choice, API keys) go in `~/.config/crev/config.toml`

**Config lookup order:** `.reviewrc` (current dir → upward) → `~/.config/crev/config.toml` → built-in defaults.


## Output formats

### Terminal (default)
Expand Down Expand Up @@ -281,7 +288,8 @@ A spinner shows while the model analyzes. Each finding streams to the terminal a
"severity": "High",
"file": "src/payments/processor.rs",
"line": 142,
"message": "balance + amount can exceed i64::MAX — use checked_add()"
"message": "balance + amount can exceed i64::MAX — use checked_add()",
"quote": "let total = balance + amount;"
}
],
"github_annotations": [
Expand Down Expand Up @@ -335,11 +343,11 @@ Then commit and push:

**Triggers:**

| Event | Behaviour |
|---|---|
| PR opened | Runs automatically |
| `/crev` comment | Runs on demand, reacts with 👀 to acknowledge |
| Push to PR branch | Does not run — comment `/crev` to re-review |
| Event | Behaviour |
| ----------------- | --------------------------------------------- |
| PR opened | Runs automatically |
| `/crev` comment | Runs on demand, reacts with 👀 to acknowledge |
| Push to PR branch | Does not run — comment `/crev` to re-review |

---

Expand All @@ -356,6 +364,50 @@ This surfaces systemic issues that keep slipping through code review.

---

## Review quality

crev does more than just pipe a diff into an LLM. Every finding goes through three filters before it reaches your terminal:

### 1. Diff-aware validation

LLMs regularly cite line numbers that don't exist in the diff — off-by-one from header lines, completely fabricated, or on the wrong file entirely. crev indexes every `(file, line)` pair the model was shown and:

- **Drops** findings whose line/file isn't in the diff (`note: dropped N hallucinated finding(s)`).
- **Re-anchors** findings within ±3 lines to the nearest real line, preferring lines the change *added* over surrounding context.
- **Flags** findings that landed on a context line — they describe pre-existing code the change merely touches, not the change itself.

### 2. Self-critique pass

After the first review, crev sends the findings back through the model with a sharper "is each one specific, grounded, and actionable?" prompt. The model emits `KEEP` or `DROP` per finding; low-signal ones are removed before you see them. Skip with `--no-critique` if you want raw output.

### 3. Source quotes

Every accepted finding carries the cited source line as a `quote`, rendered dimmed under the description so you can judge correctness without leaving the terminal:

```
[!] HIGH src/payments/processor.rs:142
balance + amount can exceed i64::MAX — use checked_add()
│ let total = balance + amount;
```

---

## Performance

### Chunking for large diffs

When the rendered diff exceeds `max_tokens`, crev splits it by file and runs the review across multiple chunks rather than dropping the biggest files entirely. Findings are merged, validated, and critiqued as one set.

### Prompt caching

The Anthropic backend uses Anthropic's prompt-caching API to mark the system prompt and stable user-block as cacheable. The diff and context (which change per call) stay uncached. The provider returns ~90% cost discount on the cached prefix within a 5-minute window — pre-commit hooks that re-review the same change multiple times effectively pay for the diff alone after the first call.

### Self-update

`crev update` downloads the install script, shows the URL, and asks for confirmation before running it. Skip the prompt with `CREV_UPDATE_YES=1` for non-interactive environments.

---

## License

HEHEHEHEHE
109 changes: 82 additions & 27 deletions docs/context.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ Architecture reference for contributors and AI assistants working on this repo.

| File | Purpose |
|---|---|
| `src/main.rs` | CLI entrypoint, clap commands, review orchestration, spinner, streaming output |
| `src/llm.rs` | Trait-based LLM backend system (Ollama, Anthropic, OpenAI, Gemini) |
| `src/main.rs` | CLI entrypoint, clap commands, chunk-aware review orchestration, spinner, streaming output |
| `src/llm.rs` | Trait-based LLM backend system (Ollama, Anthropic, OpenAI, Gemini), prompt caching support via `PromptParts` |
| `src/ollama.rs` | Ollama HTTP client — streaming, model detection, health check |
| `src/git.rs` | git2 wrapper — staged/unstaged/commit/range diffs → `ParsedDiff` |
| `src/ast.rs` | tree-sitter multi-language parser — functions, types, call graph |
| `src/context.rs` | Builds `ReviewContext` from a diff: finds changed fns, resolves call defs, fits into token budget |
| `src/prompt.rs` | Assembles the final LLM prompt from `ReviewContext` + config rules |
| `src/output.rs` | Parses LLM output lines into `Finding` structs, pretty-prints with colors, JSON output |
| `src/config.rs` | Loads `.reviewrc` (repo-local) and `~/.config/crev/config.toml` (global) |
| `src/prompt.rs` | Assembles the LLM prompt; emits `PromptPartsOwned` for cache-aware backends; chunks oversize diffs |
| `src/output.rs` | Parses LLM output lines into `Finding` structs (incl. cited source `quote`), pretty-prints with colors, JSON output |
| `src/validate.rs` | Diff-aware validator: drops findings citing lines not in the diff, re-anchors near-misses, attaches source quote |
| `src/critique.rs` | Second LLM pass that filters low-signal findings (KEEP/DROP per finding); opt-out via `--no-critique` |
| `src/config.rs` | Loads `.reviewrc` (repo-local) and `~/.config/crev/config.toml` (global), guards ignore-glob patterns |
| `src/history.rs` | SQLite review history — saves reviews, detects recurring patterns |
| `src/linters.rs` | Runs language linters (clippy, eslint, ruff, golangci-lint) and filters findings to diff lines |
| `src/linters.rs` | Runs language linters (clippy, eslint, ruff, golangci-lint, semgrep) and filters findings to diff lines |

---

Expand All @@ -28,16 +30,23 @@ Architecture reference for contributors and AI assistants working on this repo.
git diff
└─▶ git.rs::get_*_diff()
└─▶ ParsedDiff { files[], stats }
└─▶ context.rs::ContextBuilder::build()
├─ ast.rs → functions_changed, called_functions, test_functions
├─ token budget fit (priority: diff > types > called fns > tests)
└─▶ ReviewContext
└─▶ prompt.rs::build_review_prompt()
└─▶ String (prompt)
└─▶ llm.rs::resolve() → backend.complete()
└─▶ on_token callback (line buffering → mpsc channel)
└─▶ output.rs::try_parse_finding_line()
└─▶ Finding[] → print / JSON
└─▶ ignore-glob filter (config.rs)
└─▶ linters::run_linters() (once, whole diff)
└─▶ prompt::chunk_diff_by_files()
└─▶ for each chunk:
├─ context.rs::ContextBuilder::build() → ReviewContext
├─ prompt::build_review_prompt_parts_ctx() → PromptPartsOwned
└─ backend.complete_parts(parts) (Anthropic: cache_control on system+cacheable)
└─▶ on_token → mpsc channel → output::try_parse_finding_line()
└─▶ raw Finding[]
└─▶ validate::DiffIndex.apply() per finding
├─ drop if line/file not in diff
├─ re-anchor (prefer Added) if within ±3 lines
├─ attach source quote
└─▶ kept Finding[]
└─▶ critique::run_critique() (KEEP/DROP per finding)
└─▶ surviving Finding[]
└─▶ print / JSON / history / fail_on
```

---
Expand All @@ -61,11 +70,28 @@ ReviewContext { diff, functions_changed, called_functions, types_used, test_func
ContextQuality = Rich | Partial | Minimal

// output.rs
Finding { severity: Severity, file, line, message }
Finding { severity: Severity, file, line, message, quote: Option<String> }
Severity = High | Med | Low | Lgtm

// validate.rs
DiffIndex { files: HashMap<String, FileLines> }
FileLines { added: HashSet<u32>, context: HashSet<u32>, content: HashMap<u32, String> }
Validation = Accept { on_change } | Reanchor { from, to, on_change } | Drop(DropReason)
DropReason = UnknownFile | LineNotInDiff

// critique.rs
CritiqueResult { kept: Vec<Finding>, dropped: Vec<(Finding, String)> }

// llm.rs
trait LlmBackend { complete(prompt, on_token) -> Result<String>; name(); is_local() }
PromptParts<'a> { system: &str, cacheable: &str, dynamic: &str }
trait LlmBackend {
complete(prompt, on_token) -> Result<String>;
complete_parts(parts, on_token) -> Result<String>; // default impl concatenates
name(); is_local();
}

// prompt.rs
PromptPartsOwned { system: String, cacheable: String, dynamic: String }
```

---
Expand All @@ -82,8 +108,11 @@ Each backend implements `LlmBackend::complete()` which streams tokens via the `o

**API key env vars:**
- Anthropic: `ANTHROPIC_API_KEY` (or `api_key_env` in config)
- OpenAI: `OPENAI_API_KEY`, base URL override: `OPENAI_BASE_URL`
- Gemini: `GEMINI_API_KEY` or `GOOGLE_API_KEY`
- OpenAI: `OPENAI_API_KEY`, base URL override: `OPENAI_BASE_URL` (validated: https + non-private host; set `CREV_ALLOW_INSECURE_BASE_URL=1` for local proxies)
- Gemini: `GEMINI_API_KEY` or `GOOGLE_API_KEY` (sent via `x-goog-api-key` header, never in URL)

**Prompt caching (Anthropic only):**
`AnthropicBackend::complete_parts` sets `cache_control: { type: "ephemeral" }` on the system block and the stable `cacheable` user block (team rules + output format). The `dynamic` block (diff + context + linter findings) stays uncached. 90% cost discount on the cached prefix within a 5-minute window. Anthropic-only — other backends concatenate via the default trait impl.

---

Expand All @@ -104,18 +133,44 @@ Token budget priority: diff content → type defs → called function bodies →
- `Partial` — some context but not rich
- `Minimal` — diff only

**Safety caps:** files over 1 MiB are skipped; the walker stops after 5,000 source files (prevents pathological repos from making `build()` run for minutes).

---

## Chunking (`src/prompt.rs::chunk_diff_by_files`)

When the rendered diff exceeds `max_tokens * 4` chars, `chunk_diff_by_files` greedily packs files into chunks under budget. Files that exceed budget alone become their own chunk and the existing truncator trims their context lines further at prompt-build time. `run_review` loops over chunks, building context and prompt per chunk; linter findings come from a single whole-diff run and are filtered per chunk.

---

## Validation + critique (`src/validate.rs`, `src/critique.rs`)

After each chunk's LLM call, raw findings pass through `DiffIndex::apply`:
- Line in **Added** set → accept, mark `on_change: true`
- Line in **Context** set → accept, mark `on_change: false` (surfaces in summary as "references unchanged context lines")
- Within ±3 lines of a real line → re-anchor (prefer Added)
- Otherwise → drop (`UnknownFile` or `LineNotInDiff`)

Accepted findings get the cited source line attached as `quote`.

After all chunks have been validated, `critique::run_critique` runs one more LLM call asking the model to grade each kept finding as KEEP or DROP. Unparsed decisions default to KEEP so a broken critique response can't hide a real finding. Skip with `--no-critique` to recover the live-streaming UX.

---

## Streaming output (`src/main.rs`)

Findings stream to the terminal as soon as each line arrives from the LLM:
Two modes:

- **Live stream** (single-chunk path, `--no-critique`, non-JSON):
1. Spinner runs on a separate tokio task (watch channel stop signal)
2. `on_token` callback buffers tokens, sends complete lines via unbounded mpsc channel
3. Consumer loop receives lines, calls `output::try_parse_finding_line()`
4. On first finding: stops spinner, clears spinner line
5. Each finding is printed immediately via `output::print_finding()`

- **Batched** (chunking active OR critique active OR JSON): findings are collected silently across all chunks, validated, critiqued, then printed in one block. A finding that critique would later drop is never shown.

1. Spinner runs on a separate tokio task (watch channel stop signal)
2. `on_token` callback buffers tokens, sends complete lines via unbounded mpsc channel
3. Consumer loop receives lines, calls `output::try_parse_finding_line()`
4. On first finding: stops spinner, clears spinner line
5. Each finding is printed immediately via `output::print_finding()`
6. After `complete()` returns: print summary line
Run-summary footer surfaces re-anchor count, context-only count, dropped-hallucination list, and critique-dropped list so the user sees how noisy the model was.

---

Expand Down
25 changes: 25 additions & 0 deletions src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,31 @@ pub enum TypeKind {
Class,
}

/// Maximum lines we'll ship for a single called-function body before
/// distilling. Most "what does this called function do?" questions are
/// answered by its first ~12 lines and its return statement; shipping a
/// 200-line body burns context budget for negligible signal gain.
pub const MAX_CALLED_FN_LINES: usize = 15;

/// Render a function for inclusion in the prompt: full body if short, or
/// a head + tail slice with an "N lines omitted" marker otherwise.
pub fn distill_function(info: &FunctionInfo, max_lines: usize) -> String {
let lines: Vec<&str> = info.full_text.lines().collect();
if lines.len() <= max_lines {
return info.full_text.clone();
}
let head_take = max_lines.saturating_sub(2).max(1);
let head: Vec<&str> = lines.iter().take(head_take).copied().collect();
let tail = lines.last().copied().unwrap_or("}");
let omitted = lines.len().saturating_sub(head_take + 1);
format!(
"{}\n // ... {} lines omitted ...\n{}",
head.join("\n"),
omitted,
tail
)
}

// ── parser ───────────────────────────────────────────────────────────────────

pub struct AstParser {
Expand Down
28 changes: 28 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ pub fn should_ignore_file(path: &Path, config: &Config) -> bool {
let path_str = path.to_string_lossy();

for pattern in &config.ignore.paths {
if !is_safe_ignore_pattern(pattern) {
eprintln!(
"warning: ignoring unsafe glob pattern {:?} \
(must be repo-relative, no '..' or absolute paths)",
pattern
);
continue;
}
if glob_match(pattern, &path_str) {
return true;
}
Expand All @@ -197,6 +205,26 @@ pub fn should_ignore_file(path: &Path, config: &Config) -> bool {
false
}

fn is_safe_ignore_pattern(pattern: &str) -> bool {
if pattern.is_empty() {
return false;
}
if pattern.starts_with('/') || pattern.starts_with('\\') {
return false;
}
// Windows-style drive prefix
if pattern.len() >= 2 && pattern.chars().nth(1) == Some(':') {
return false;
}
// Reject any '..' segment (handles `../`, `..\\`, and a trailing `..`).
for segment in pattern.split(|c| c == '/' || c == '\\') {
if segment == ".." {
return false;
}
}
true
}

fn glob_match(pattern: &str, path: &str) -> bool {
// Use the glob crate for matching
if let Ok(pat) = glob::Pattern::new(pattern) {
Expand Down
Loading