Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .githooks/pre-commit
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if [ $? -ne 0 ]; then
fi

# Clippy
cargo clippy --workspace -- -D warnings
cargo clippy --workspace --all-targets -- -D warnings
if [ $? -ne 0 ]; then
echo "Clippy check failed. Fix warnings before committing."
exit 1
Expand Down
27 changes: 11 additions & 16 deletions .github/workflows/bench-regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,13 @@ jobs:
cargo bench -p lattice-inference --bench elementwise_cpu_bench --no-run
cargo bench -p lattice-embed --bench simd --no-run

- name: Run benches against baseline
if: steps.fetch_baseline.outputs.have_baseline == 'true'
run: |
cargo bench -p lattice-inference --bench elementwise_cpu_bench -- --baseline base --noplot
cargo bench -p lattice-embed --bench simd -- --baseline base --noplot

- name: Run benches without baseline (seed)
if: steps.fetch_baseline.outputs.have_baseline == 'false'
- name: Run benches
run: |
cargo bench -p lattice-inference --bench elementwise_cpu_bench -- --save-baseline base --noplot
cargo bench -p lattice-embed --bench simd -- --save-baseline base --noplot
# --save-baseline saves new data AND compares against existing baseline
# if present. Unlike --baseline, it doesn't panic when a bench group
# has no prior baseline (e.g., newly added bench groups).
cargo bench -p lattice-inference --bench elementwise_cpu_bench -- --save-baseline base --noplot --quick
cargo bench -p lattice-embed --bench simd -- --save-baseline base --noplot --quick

- name: Apply gate
id: gate
Expand All @@ -98,15 +94,14 @@ jobs:
&& echo "gate=pass" >> "$GITHUB_OUTPUT" \
|| echo "gate=fail" >> "$GITHUB_OUTPUT"

- name: Note seed run
- name: Note no baseline
if: steps.fetch_baseline.outputs.have_baseline == 'false'
run: |
mkdir -p .
cat > report-${{ matrix.arch }}.md <<EOF
### \`${{ matrix.arch }}\` — no baseline available
cat > report-${{ matrix.arch }}.md <<'EOF'
### `${{ matrix.arch }}` — no baseline available

The \`perf-baselines\` branch has no data for this arch yet. Run
\`bench-update.yml\` on \`main\` to seed it. This PR is not gated.
The `perf-baselines` branch has no data for this arch yet. Run
`bench-update.yml` on `main` to seed it. This PR is not gated.
EOF

- name: Upload report
Expand Down
2 changes: 1 addition & 1 deletion crates/inference/benches/metrics_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ fn naive_entropy_nats(logits: &[f32]) -> f32 {
if logits.len() < 2 {
return 0.0;
}
let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let mut sum_exp = 0.0_f32;
let mut exps: Vec<f32> = logits
.iter()
Expand Down
2 changes: 1 addition & 1 deletion crates/inference/src/attention/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ mod attention_kind_tests {
head_dim: 128,
};
let kind = AttentionKind::Gqa(cfg);
let cloned = kind.clone();
let cloned = kind;
assert_eq!(cloned.name(), "gqa");
if let AttentionKind::Gqa(c) = cloned {
assert_eq!(c.num_heads, 32);
Expand Down
23 changes: 9 additions & 14 deletions crates/inference/src/kv_cache/flat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -675,10 +675,10 @@ mod tests {
};
let kv_dim = 2 * 4; // 8
// f16: 2 * 1 * 16 * 8 * 2 = 512 bytes
let expected_f16 = 2 * 1 * 16 * kv_dim * std::mem::size_of::<f16>();
let expected_f16 = 2 * 16 * kv_dim * std::mem::size_of::<f16>();
assert_eq!(config.total_bytes(), expected_f16);
// Would have been 1024 with f32
let would_be_f32 = 2 * 1 * 16 * kv_dim * std::mem::size_of::<f32>();
let would_be_f32 = 2 * 16 * kv_dim * std::mem::size_of::<f32>();
assert_eq!(config.total_bytes() * 2, would_be_f32);
}

Expand Down Expand Up @@ -947,8 +947,7 @@ mod tests {
// Measured relative error must be < 0.1% (0.001)
assert!(
max_rel_kv < 0.001,
"max relative error for KV in [-10,10] is {:.4e}, expected < 0.001",
max_rel_kv
"max relative error for KV in [-10,10] is {max_rel_kv:.4e}, expected < 0.001"
);
}

Expand Down Expand Up @@ -1023,7 +1022,7 @@ mod tests {
// Phase 2: stable softmax
let max_s = scores[..kv_seq_len]
.iter()
.cloned()
.copied()
.fold(f32::NEG_INFINITY, f32::max);
let sum: f32 = scores[..kv_seq_len]
.iter_mut()
Expand Down Expand Up @@ -1227,31 +1226,27 @@ mod tests {

let top1_rate = top1_match_count as f32 / total_cases as f32;
eprintln!(
"\n=== Tensor Oracle Summary ===\n logit_max_abs_diff = {:.4e} (gate: < 0.02)\n top1_match_rate = {:.4} (gate: >= 0.95)\n nan_count = {}\n max_synth_nll_delta= {:.4e} (gate: < 0.01)",
global_max_logit_diff, top1_rate, nan_count, max_synth_nll_delta
"\n=== Tensor Oracle Summary ===\n logit_max_abs_diff = {global_max_logit_diff:.4e} (gate: < 0.02)\n top1_match_rate = {top1_rate:.4} (gate: >= 0.95)\n nan_count = {nan_count}\n max_synth_nll_delta= {max_synth_nll_delta:.4e} (gate: < 0.01)"
);

assert_eq!(nan_count, 0, "f16 KV dequant introduced NaN in logits");
assert!(
global_max_logit_diff < 0.02,
"logit_max_abs_diff {:.4e} >= 0.02 gate",
global_max_logit_diff
"logit_max_abs_diff {global_max_logit_diff:.4e} >= 0.02 gate"
);
assert!(
top1_rate >= 0.95,
"top1_match_rate {:.4} < 0.95 gate",
top1_rate
"top1_match_rate {top1_rate:.4} < 0.95 gate"
);
assert!(
max_synth_nll_delta < 0.01,
"max synthetic NLL delta {:.4e} >= 0.01",
max_synth_nll_delta
"max synthetic NLL delta {max_synth_nll_delta:.4e} >= 0.01"
);
}

/// Compute log softmax probability for target token (for synthetic NLL).
fn softmax_log_prob(logits: &[f32], target: usize) -> f32 {
let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let sum: f32 = logits.iter().map(|&l| (l - max_l).exp()).sum();
let log_sum = sum.ln();
(logits[target] - max_l) - log_sum
Expand Down
6 changes: 3 additions & 3 deletions crates/inference/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ mod tests {
let online_h = acc.entropy_nats();

// Naive reference: softmax → -sum p log p.
let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let exps: Vec<f32> = logits.iter().map(|&l| (l - max_l).exp()).collect();
let sum_exp: f32 = exps.iter().sum();
let naive_h: f32 = exps
Expand Down Expand Up @@ -557,7 +557,7 @@ mod tests {
// 32 tokens at +3.0, 32 tokens at -3.0 → two-cluster split.
// Both clusters uniform within themselves; compare online to naive.
let mut logits = vec![3.0_f32; 32];
logits.extend(std::iter::repeat(-3.0_f32).take(32));
logits.extend(std::iter::repeat_n(-3.0_f32, 32));

let mut acc = OnlineSoftmaxEntropy::new();
for &l in &logits {
Expand Down Expand Up @@ -606,7 +606,7 @@ mod tests {
assert!(h >= 0.0, "subnormal entropy must be non-negative, got {h}");

// Verify within tolerance of naive reference.
let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let exps: Vec<f32> = logits.iter().map(|&l| (l - max_l).exp()).collect();
let sum_exp: f32 = exps.iter().sum();
let naive_h: f32 = exps
Expand Down
5 changes: 2 additions & 3 deletions crates/inference/src/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,7 @@ mod tests {
);
assert!(
c.unwrap().is_finite(),
"large identical cosine must be finite, got {:?}",
c
"large identical cosine must be finite, got {c:?}"
);
assert_close(
c.unwrap(),
Expand Down Expand Up @@ -505,7 +504,7 @@ mod tests {
// Paper: mean(cos) = (1.0 + 0.0) / 2 = 0.5
// Pseudocode drift: sum(dot) / sum(norms) = 0.0 / 10001.0 ≈ 0.0 ← WRONG
let unit_x = vec![1.0_f32, 0.0];
let _unit_y = vec![0.0_f32, 1.0];
let _unit_y = [0.0_f32, 1.0];
let big_x = vec![100.0_f32, 0.0];
let big_y = vec![0.0_f32, 100.0];

Expand Down
Loading