diff --git a/.githooks/pre-commit b/.githooks/pre-commit index ada5396f5..bad8594b5 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -11,7 +11,7 @@ if [ $? -ne 0 ]; then fi # Clippy -cargo clippy --workspace -- -D warnings +cargo clippy --workspace --all-targets -- -D warnings if [ $? -ne 0 ]; then echo "Clippy check failed. Fix warnings before committing." exit 1 diff --git a/.github/workflows/bench-regression.yml b/.github/workflows/bench-regression.yml index 6d109aa6d..ce7151569 100644 --- a/.github/workflows/bench-regression.yml +++ b/.github/workflows/bench-regression.yml @@ -77,17 +77,13 @@ jobs: cargo bench -p lattice-inference --bench elementwise_cpu_bench --no-run cargo bench -p lattice-embed --bench simd --no-run - - name: Run benches against baseline - if: steps.fetch_baseline.outputs.have_baseline == 'true' - run: | - cargo bench -p lattice-inference --bench elementwise_cpu_bench -- --baseline base --noplot - cargo bench -p lattice-embed --bench simd -- --baseline base --noplot - - - name: Run benches without baseline (seed) - if: steps.fetch_baseline.outputs.have_baseline == 'false' + - name: Run benches run: | - cargo bench -p lattice-inference --bench elementwise_cpu_bench -- --save-baseline base --noplot - cargo bench -p lattice-embed --bench simd -- --save-baseline base --noplot + # --save-baseline saves new data AND compares against existing baseline + # if present. Unlike --baseline, it doesn't panic when a bench group + # has no prior baseline (e.g., newly added bench groups). + cargo bench -p lattice-inference --bench elementwise_cpu_bench -- --save-baseline base --noplot --quick + cargo bench -p lattice-embed --bench simd -- --save-baseline base --noplot --quick - name: Apply gate id: gate @@ -98,15 +94,14 @@ jobs: && echo "gate=pass" >> "$GITHUB_OUTPUT" \ || echo "gate=fail" >> "$GITHUB_OUTPUT" - - name: Note seed run + - name: Note no baseline if: steps.fetch_baseline.outputs.have_baseline == 'false' run: | - mkdir -p . - cat > report-${{ matrix.arch }}.md < report-${{ matrix.arch }}.md <<'EOF' + ### `${{ matrix.arch }}` — no baseline available - The \`perf-baselines\` branch has no data for this arch yet. Run - \`bench-update.yml\` on \`main\` to seed it. This PR is not gated. + The `perf-baselines` branch has no data for this arch yet. Run + `bench-update.yml` on `main` to seed it. This PR is not gated. EOF - name: Upload report diff --git a/crates/inference/benches/metrics_bench.rs b/crates/inference/benches/metrics_bench.rs index 8e69a788d..8bb158dba 100644 --- a/crates/inference/benches/metrics_bench.rs +++ b/crates/inference/benches/metrics_bench.rs @@ -60,7 +60,7 @@ fn naive_entropy_nats(logits: &[f32]) -> f32 { if logits.len() < 2 { return 0.0; } - let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max); let mut sum_exp = 0.0_f32; let mut exps: Vec = logits .iter() diff --git a/crates/inference/src/attention/mod.rs b/crates/inference/src/attention/mod.rs index 76feaade8..a1557b588 100644 --- a/crates/inference/src/attention/mod.rs +++ b/crates/inference/src/attention/mod.rs @@ -448,7 +448,7 @@ mod attention_kind_tests { head_dim: 128, }; let kind = AttentionKind::Gqa(cfg); - let cloned = kind.clone(); + let cloned = kind; assert_eq!(cloned.name(), "gqa"); if let AttentionKind::Gqa(c) = cloned { assert_eq!(c.num_heads, 32); diff --git a/crates/inference/src/kv_cache/flat.rs b/crates/inference/src/kv_cache/flat.rs index 99f40c7ba..459c8c0c9 100644 --- a/crates/inference/src/kv_cache/flat.rs +++ b/crates/inference/src/kv_cache/flat.rs @@ -675,10 +675,10 @@ mod tests { }; let kv_dim = 2 * 4; // 8 // f16: 2 * 1 * 16 * 8 * 2 = 512 bytes - let expected_f16 = 2 * 1 * 16 * kv_dim * std::mem::size_of::(); + let expected_f16 = 2 * 16 * kv_dim * std::mem::size_of::(); assert_eq!(config.total_bytes(), expected_f16); // Would have been 1024 with f32 - let would_be_f32 = 2 * 1 * 16 * kv_dim * std::mem::size_of::(); + let would_be_f32 = 2 * 16 * kv_dim * std::mem::size_of::(); assert_eq!(config.total_bytes() * 2, would_be_f32); } @@ -947,8 +947,7 @@ mod tests { // Measured relative error must be < 0.1% (0.001) assert!( max_rel_kv < 0.001, - "max relative error for KV in [-10,10] is {:.4e}, expected < 0.001", - max_rel_kv + "max relative error for KV in [-10,10] is {max_rel_kv:.4e}, expected < 0.001" ); } @@ -1023,7 +1022,7 @@ mod tests { // Phase 2: stable softmax let max_s = scores[..kv_seq_len] .iter() - .cloned() + .copied() .fold(f32::NEG_INFINITY, f32::max); let sum: f32 = scores[..kv_seq_len] .iter_mut() @@ -1227,31 +1226,27 @@ mod tests { let top1_rate = top1_match_count as f32 / total_cases as f32; eprintln!( - "\n=== Tensor Oracle Summary ===\n logit_max_abs_diff = {:.4e} (gate: < 0.02)\n top1_match_rate = {:.4} (gate: >= 0.95)\n nan_count = {}\n max_synth_nll_delta= {:.4e} (gate: < 0.01)", - global_max_logit_diff, top1_rate, nan_count, max_synth_nll_delta + "\n=== Tensor Oracle Summary ===\n logit_max_abs_diff = {global_max_logit_diff:.4e} (gate: < 0.02)\n top1_match_rate = {top1_rate:.4} (gate: >= 0.95)\n nan_count = {nan_count}\n max_synth_nll_delta= {max_synth_nll_delta:.4e} (gate: < 0.01)" ); assert_eq!(nan_count, 0, "f16 KV dequant introduced NaN in logits"); assert!( global_max_logit_diff < 0.02, - "logit_max_abs_diff {:.4e} >= 0.02 gate", - global_max_logit_diff + "logit_max_abs_diff {global_max_logit_diff:.4e} >= 0.02 gate" ); assert!( top1_rate >= 0.95, - "top1_match_rate {:.4} < 0.95 gate", - top1_rate + "top1_match_rate {top1_rate:.4} < 0.95 gate" ); assert!( max_synth_nll_delta < 0.01, - "max synthetic NLL delta {:.4e} >= 0.01", - max_synth_nll_delta + "max synthetic NLL delta {max_synth_nll_delta:.4e} >= 0.01" ); } /// Compute log softmax probability for target token (for synthetic NLL). fn softmax_log_prob(logits: &[f32], target: usize) -> f32 { - let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max); let sum: f32 = logits.iter().map(|&l| (l - max_l).exp()).sum(); let log_sum = sum.ln(); (logits[target] - max_l) - log_sum diff --git a/crates/inference/src/metrics.rs b/crates/inference/src/metrics.rs index 11cd36afa..81eeb23e3 100644 --- a/crates/inference/src/metrics.rs +++ b/crates/inference/src/metrics.rs @@ -343,7 +343,7 @@ mod tests { let online_h = acc.entropy_nats(); // Naive reference: softmax → -sum p log p. - let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max); let exps: Vec = logits.iter().map(|&l| (l - max_l).exp()).collect(); let sum_exp: f32 = exps.iter().sum(); let naive_h: f32 = exps @@ -557,7 +557,7 @@ mod tests { // 32 tokens at +3.0, 32 tokens at -3.0 → two-cluster split. // Both clusters uniform within themselves; compare online to naive. let mut logits = vec![3.0_f32; 32]; - logits.extend(std::iter::repeat(-3.0_f32).take(32)); + logits.extend(std::iter::repeat_n(-3.0_f32, 32)); let mut acc = OnlineSoftmaxEntropy::new(); for &l in &logits { @@ -606,7 +606,7 @@ mod tests { assert!(h >= 0.0, "subnormal entropy must be non-negative, got {h}"); // Verify within tolerance of naive reference. - let max_l = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + let max_l = logits.iter().copied().fold(f32::NEG_INFINITY, f32::max); let exps: Vec = logits.iter().map(|&l| (l - max_l).exp()).collect(); let sum_exp: f32 = exps.iter().sum(); let naive_h: f32 = exps diff --git a/crates/inference/src/pruning.rs b/crates/inference/src/pruning.rs index fcba46cbb..fcce0aa18 100644 --- a/crates/inference/src/pruning.rs +++ b/crates/inference/src/pruning.rs @@ -309,8 +309,7 @@ mod tests { ); assert!( c.unwrap().is_finite(), - "large identical cosine must be finite, got {:?}", - c + "large identical cosine must be finite, got {c:?}" ); assert_close( c.unwrap(), @@ -505,7 +504,7 @@ mod tests { // Paper: mean(cos) = (1.0 + 0.0) / 2 = 0.5 // Pseudocode drift: sum(dot) / sum(norms) = 0.0 / 10001.0 ≈ 0.0 ← WRONG let unit_x = vec![1.0_f32, 0.0]; - let _unit_y = vec![0.0_f32, 1.0]; + let _unit_y = [0.0_f32, 1.0]; let big_x = vec![100.0_f32, 0.0]; let big_y = vec![0.0_f32, 100.0];