fix: add behavioral depth criteria to task acceptance and parity checks (#225)

jafreck · web-flow · commit e7f4e7efdb1e · 2026-03-27T10:18:40.000-07:00
* fix: add behavioral depth criteria to task acceptance and parity checks

The task graph builder was generating only structural acceptance criteria
(symbols exist, signatures match, compiles) while the parity verifier
enforced full behavioral equivalence. This gap caused the code-migrator
to produce hollow implementations that compiled but didn't actually
perform the intended computation — leading to expensive retry loops.

Both buildAcceptanceCriteria() and buildParityChecks() now generate
behavioral criteria for every task containing functions:

Acceptance criteria:
- Full implementation required (no stubs/TODOs/placeholders)
- Behavioral equivalence using idiomatic target-language patterns
- Implementation depth must match source complexity

Parity checks:
- All source code paths reachable in target
- No hollow implementations (input-dependent output required)
- Internal call chains wired end-to-end

These criteria apply uniformly regardless of task size — a 30-line
hash function gets the same behavioral bar as a 900-line compressor.
Criteria use behavioral language (same observable outputs) rather than
structural language, so idiomatic rewrites are not penalized.

* fix: clarify guidance to ban unsafe and prioritize safety over performance
diff --git a/src/core/task-graph-builder.ts b/src/core/task-graph-builder.ts
@@ -1690,6 +1690,27 @@ function buildAcceptanceCriteria(cluster: Cluster): string[] {
   }
   criteria.push('Call-site signatures match upstream dependency contracts');
   criteria.push('Target code compiles without type errors');
+
+  // Behavioral depth: every function body must contain a real implementation,
+  // not stubs, pass-throughs, or hollow wrappers.  The target language idioms
+  // may differ from the source (Result vs error code, Vec vs linked list, etc.)
+  // but the *computational depth* must match: if the source performs a non-trivial
+  // transformation (compression, hashing, encoding, decoding, parsing, etc.),
+  // the target must perform an equivalently non-trivial transformation.
+  const hasFunctions = cluster.symbols.some(
+    s => s.kind === 'function' || s.kind === 'method',
+  );
+  if (hasFunctions) {
+    criteria.push(
+      'Every function body is fully implemented — no stubs, TODOs, unimplemented!() macros, or placeholder logic',
+    );
+    criteria.push(
+      'Behavioral equivalence: for all reachable inputs, the migrated code produces the same observable outputs, side effects, and error conditions as the source — using idiomatic target-language patterns (different types, signatures, and error models are expected)',
+    );
+    criteria.push(
+      'Implementation depth matches source complexity — if the source performs a non-trivial transformation (compression, hashing, encryption, codec logic, etc.), the target must perform an equivalently non-trivial computation, not a pass-through or synthetic wrapper',
+    );
+  }
   return criteria;
 }
 
@@ -1700,5 +1721,23 @@ function buildParityChecks(cluster: Cluster): string[] {
   if (cluster.symbols.some(s => s.kind === 'type' || s.kind === 'class' || s.kind === 'struct')) {
     checks.push('Type definitions preserve public field names and types');
   }
+
+  // Behavioral parity checks — enforce depth without enforcing structural
+  // similarity.  These checks tell the parity verifier (and the migrator)
+  // what "correct" means for this task: same behavior, not same shape.
+  const hasFunctions = cluster.symbols.some(
+    s => s.kind === 'function' || s.kind === 'method',
+  );
+  if (hasFunctions) {
+    checks.push(
+      'All source code paths and branches are reachable in the target — no dead dispatch or unreachable algorithm branches',
+    );
+    checks.push(
+      'No hollow implementations: functions must produce non-trivial, input-dependent output matching the source semantics (not zeros, defaults, or pass-through copies)',
+    );
+    checks.push(
+      'Internal call chains are wired end-to-end — public entry points transitively invoke the same algorithmic stages as the source (e.g., a compressor must call encoding, matching, and entropy stages, not return the input with framing)',
+    );
+  }
   return checks;
 }
diff --git a/tests/core/task-graph-builder.test.ts b/tests/core/task-graph-builder.test.ts
@@ -846,6 +846,92 @@ describe('buildTaskGraph', () => {
       );
     }
   });
+
+  it('should include behavioral acceptance criteria for tasks with functions', async () => {
+    const dbPath = join(tempDir, 'kb.db');
+    const db = createTestDb(dbPath);
+    const f = insertFile(db, 'src/codec.c');
+    insertSymbol(db, f, 'compress', 'function', 1, 80);
+    db.close();
+
+    const result = await buildTaskGraph({ ...DEFAULT_OPTIONS, kbDbPath: dbPath });
+    const task = result.tasks.find(t =>
+      t.symbols?.some(s => s.name === 'compress'),
+    );
+    expect(task).toBeDefined();
+    expect(task!.acceptanceCriteria).toEqual(
+      expect.arrayContaining([
+        expect.stringMatching(/fully implemented/i),
+        expect.stringMatching(/behavioral equivalence/i),
+        expect.stringMatching(/implementation depth/i),
+      ]),
+    );
+  });
+
+  it('should include behavioral parity checks for tasks with functions', async () => {
+    const dbPath = join(tempDir, 'kb.db');
+    const db = createTestDb(dbPath);
+    const f = insertFile(db, 'src/hash.c');
+    insertSymbol(db, f, 'xxh64', 'function', 1, 80);
+    db.close();
+
+    const result = await buildTaskGraph({ ...DEFAULT_OPTIONS, kbDbPath: dbPath });
+    const task = result.tasks.find(t =>
+      t.symbols?.some(s => s.name === 'xxh64'),
+    );
+    expect(task).toBeDefined();
+    expect(task!.parityChecks).toEqual(
+      expect.arrayContaining([
+        expect.stringMatching(/code paths.*reachable/i),
+        expect.stringMatching(/hollow implementations/i),
+        expect.stringMatching(/call chains.*wired/i),
+      ]),
+    );
+  });
+
+  it('should not include behavioral criteria for type-only tasks', async () => {
+    const dbPath = join(tempDir, 'kb.db');
+    const db = createTestDb(dbPath);
+    const f = insertFile(db, 'src/types.c');
+    insertSymbol(db, f, 'Options', 'struct', 1, 40);
+    insertSymbol(db, f, 'Mode', 'enum', 41, 60);
+    db.close();
+
+    const result = await buildTaskGraph({ ...DEFAULT_OPTIONS, kbDbPath: dbPath });
+    const task = result.tasks.find(t =>
+      t.symbols?.every(s => s.kind === 'struct' || s.kind === 'enum'),
+    );
+    expect(task).toBeDefined();
+    // Should NOT have function-specific behavioral criteria
+    expect(task!.acceptanceCriteria.join(' ')).not.toMatch(/fully implemented/i);
+    expect(task!.parityChecks.join(' ')).not.toMatch(/hollow implementations/i);
+  });
+
+  it('should apply behavioral criteria uniformly regardless of task size', async () => {
+    const dbPath = join(tempDir, 'kb.db');
+    const db = createTestDb(dbPath);
+    const f = insertFile(db, 'src/util.c');
+    // Small function — above micro-elision threshold but still simple
+    insertSymbol(db, f, 'crc32', 'function', 1, 40);
+    db.close();
+
+    const result = await buildTaskGraph({ ...DEFAULT_OPTIONS, kbDbPath: dbPath });
+    const task = result.tasks.find(t =>
+      t.symbols?.some(s => s.name === 'crc32'),
+    );
+    expect(task).toBeDefined();
+    // Even a tiny function gets the full behavioral criteria
+    expect(task!.acceptanceCriteria).toEqual(
+      expect.arrayContaining([
+        expect.stringMatching(/implementation depth/i),
+      ]),
+    );
+    expect(task!.parityChecks).toEqual(
+      expect.arrayContaining([
+        expect.stringMatching(/call chains.*wired/i),
+      ]),
+    );
+  });
 });
 
 // ─── buildDependencySummary Tests ───────────────────────────────────────────
diff --git a/tests/fixtures/zstd-c-project/migration.config.json b/tests/fixtures/zstd-c-project/migration.config.json
@@ -3,8 +3,9 @@
   "guidance": [
     "Do NOT use any existing Rust crates that wrap the C implementation (e.g. zstd, zstd-safe, zstd-sys, lz4-sys). Write a pure native Rust port of the C source code.",
     "Do NOT use FFI, bindgen, or any C interop. All code must be idiomatic safe Rust.",
-    "Preserve the original algorithmic structure so that the Rust port is auditable against the C reference.",
-    "Performance parity with the C implementation is required. Retain performance-critical optimizations such as SIMD intrinsics using minimal unsafe Rust where necessary."
+    "All code must be safe Rust. Do NOT use `unsafe` blocks, `unsafe fn`, raw pointers (`*const`, `*mut`), or `core::ffi::c_void`. Use slices (`&[u8]`, `&mut [u8]`), Vec, iterators, and Rust's standard byte-order methods (e.g. `u32::from_le_bytes`, `to_be_bytes`) instead of pointer casts and manual memory access. If a C pattern seems to require unsafe, find the safe Rust equivalent — it almost always exists.",
+    "Preserve the algorithmic logic (same algorithmic steps, same data flow) so the port is auditable against the C reference — but use idiomatic Rust types and APIs. The algorithm should be recognizable, not the function signatures or pointer patterns.",
+    "Prefer correctness and safety over micro-optimization. It is acceptable for the Rust port to be slightly slower than the C original if the alternative is unsafe code. Do not use unsafe for performance reasons."
   ],
   "source": {
     "path": "./zstd-src/zstd-1.5.7",