Skip to content

Commit 4564ffc

Browse files
committed
v3.0.10: Harden experiment-loop with error handling, input validation, and schema fixes
- Wrap ensureParent/appendJsonl/clearStopFile in try-catch with descriptive errors - gitAutoCommit returns {ok, error} and warns on stderr; add commitOk to log output - Validate --timeout-ms bounds (1s-1h), clamp to default if out of range - runChecks detects SIGTERM/SIGKILL/ETIMEDOUT; add checksTimedOut to run output - Validate --secondary metric values are finite numbers - Add validateExperimentEntry for per-type required field checks before JSONL write - Standardize fix-strategy severity enum to title-case (matches shared.schema.json) - Add allOf/if/then to experiment.schema.json for IDE/CI validation - 12 new tests (113 total, 0 failures)
1 parent b73627a commit 4564ffc

7 files changed

Lines changed: 366 additions & 18 deletions

File tree

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [3.0.10] - 2026-03-14
9+
10+
### Fixed
11+
- `experiment-loop.cjs`: `ensureParent()` and `appendJsonl()` now have try-catch protection with descriptive error messages — previously, disk-full or permission errors threw cryptic Node.js exceptions
12+
- `experiment-loop.cjs`: `clearStopFile()` handles ENOENT race condition — if stop file is removed between `existsSync` and `unlinkSync`, the error is ignored instead of crashing
13+
- `experiment-loop.cjs`: `gitAutoCommit()` returns `{ ok, error }` and logs warnings to stderr on failure — previously swallowed all errors silently
14+
- `experiment-loop.cjs`: `gitCommitHash()` logs a warning to stderr on failure — still returns `'unknown'` for backward compatibility
15+
- `experiment-loop.cjs`: `cmdRun()` validates `--timeout-ms` bounds (1s–1h) — previously accepted negative, zero, or absurdly large values
16+
- `experiment-loop.cjs`: `runChecks()` detects SIGTERM/SIGKILL/ETIMEDOUT timeouts — previously, a timed-out checks script was reported as `passed: false` with no timeout indicator
17+
- `experiment-loop.cjs`: `cmdLog()` validates that all `--secondary` metric values are finite numbers — previously accepted strings, nulls, and other non-numeric types
18+
- `fix-strategy.schema.json`: `maxSeverity` enum standardized to `["Critical","High","Medium","Low"]` — removed redundant uppercase variants inconsistent with all other schemas
19+
20+
### Added
21+
- `experiment-loop.cjs`: `commitOk` field in `log` command output — surfaces whether the auto-commit succeeded
22+
- `experiment-loop.cjs`: `checksTimedOut` field in `run` command output — indicates whether the checks script hit the 5-minute timeout
23+
- `experiment-loop.cjs`: `validateExperimentEntry()` function — validates JSONL entries before writing, enforcing per-type required fields
24+
- `experiment.schema.json`: `allOf` with `if/then` blocks documenting per-type required fields (for IDE/CI validation)
25+
- 12 new tests covering file I/O errors, git commit failures, timeout bounds, secondary metric type validation, checks timeout detection, and entry validation
26+
- Test suite: **113 tests**, 0 failures
27+
828
## [3.0.9] - 2026-03-13
929

1030
### Added

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@codexstar/bug-hunter",
3-
"version": "3.0.9",
3+
"version": "3.0.10",
44
"description": "Adversarial AI bug hunter — multi-agent pipeline finds security vulnerabilities, logic errors, and runtime bugs, then fixes them autonomously. Works with Claude Code, Cursor, Codex CLI, Copilot, Kiro, and more.",
55
"license": "MIT",
66
"main": "bin/bug-hunter",

schemas/experiment.schema.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,19 @@
5757
"minimum": 1
5858
}
5959
},
60-
"additionalProperties": false
60+
"additionalProperties": false,
61+
"allOf": [
62+
{
63+
"if": { "properties": { "type": { "const": "config" } } },
64+
"then": { "required": ["type", "segment", "name", "metric", "maxIterations"] }
65+
},
66+
{
67+
"if": { "properties": { "type": { "const": "result" } } },
68+
"then": { "required": ["type", "segment", "status", "durationMs"] }
69+
},
70+
{
71+
"if": { "properties": { "type": { "const": "resume" } } },
72+
"then": { "required": ["type", "timestamp"] }
73+
}
74+
]
6175
}

schemas/fix-strategy.schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
},
8282
"maxSeverity": {
8383
"type": "string",
84-
"enum": ["CRITICAL", "HIGH", "MEDIUM", "LOW", "Critical", "High", "Medium", "Low"]
84+
"enum": ["Critical", "High", "Medium", "Low"]
8585
},
8686
"summary": { "type": "string", "minLength": 1 },
8787
"recommendedAction": { "type": "string", "minLength": 1 },

scripts/experiment-loop.cjs

Lines changed: 115 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ const DEFAULT_CHECKS_SCRIPT = '.bug-hunter/experiment.checks.sh';
3737
const MAX_STDOUT_BYTES = 50000; // Truncate captured output to prevent memory bloat
3838
const DEFAULT_MAX_ITERATIONS = 10; // Hard cap — prevents runaway loops
3939
const MAX_CONSECUTIVE_CRASHES = 3; // Auto-stop after N crashes in a row
40+
const MIN_TIMEOUT_MS = 1000; // 1 second minimum for --timeout-ms
41+
const MAX_TIMEOUT_MS = 3600000; // 1 hour maximum for --timeout-ms
4042

4143
// ---------------------------------------------------------------------------
4244
// Helpers
@@ -51,12 +53,22 @@ function nowIso() {
5153
}
5254

5355
function ensureParent(filePath) {
54-
fs.mkdirSync(path.dirname(filePath), { recursive: true });
56+
try {
57+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
58+
} catch (err) {
59+
const msg = err instanceof Error ? err.message : String(err);
60+
throw new Error(`Failed to create parent directory for ${filePath}: ${msg}`);
61+
}
5562
}
5663

5764
function appendJsonl(filePath, obj) {
5865
ensureParent(filePath);
59-
fs.appendFileSync(filePath, `${JSON.stringify(obj)}\n`, 'utf8');
66+
try {
67+
fs.appendFileSync(filePath, `${JSON.stringify(obj)}\n`, 'utf8');
68+
} catch (err) {
69+
const msg = err instanceof Error ? err.message : String(err);
70+
throw new Error(`Failed to append to ${filePath}: ${msg}`);
71+
}
6072
}
6173

6274
function readJsonlLines(filePath) {
@@ -217,8 +229,16 @@ function isStopRequested(stopFile) {
217229
}
218230

219231
function clearStopFile(stopFile) {
220-
if (fs.existsSync(stopFile)) {
221-
fs.unlinkSync(stopFile);
232+
try {
233+
if (fs.existsSync(stopFile)) {
234+
fs.unlinkSync(stopFile);
235+
}
236+
} catch (err) {
237+
// Ignore ENOENT — file was already removed (race condition)
238+
if (err.code !== 'ENOENT') {
239+
const msg = err instanceof Error ? err.message : String(err);
240+
throw new Error(`Failed to remove stop file ${stopFile}: ${msg}`);
241+
}
222242
}
223243
}
224244

@@ -239,7 +259,12 @@ function canResume(state) {
239259
}
240260

241261
function recordResume(logPath) {
242-
appendJsonl(logPath, { type: 'resume', timestamp: nowMs() });
262+
const entry = { type: 'resume', timestamp: nowMs() };
263+
const validation = validateExperimentEntry(entry);
264+
if (!validation.ok) {
265+
throw new Error(`Invalid resume entry: ${validation.errors.join('; ')}`);
266+
}
267+
appendJsonl(logPath, entry);
243268
}
244269

245270
// ---------------------------------------------------------------------------
@@ -272,6 +297,44 @@ function validateSecondaryMetrics(state, secondaryMetrics, force) {
272297
return { ok: errors.length === 0, errors };
273298
}
274299

300+
// ---------------------------------------------------------------------------
301+
// Entry validation before JSONL write
302+
// ---------------------------------------------------------------------------
303+
304+
function validateExperimentEntry(entry) {
305+
const errors = [];
306+
if (!entry || typeof entry !== 'object') {
307+
return { ok: false, errors: ['Entry must be an object'] };
308+
}
309+
if (!['config', 'result', 'resume'].includes(entry.type)) {
310+
errors.push(`Invalid type: ${entry.type}`);
311+
return { ok: false, errors };
312+
}
313+
if (entry.type === 'config') {
314+
if (typeof entry.segment !== 'number') errors.push('config entry requires segment (number)');
315+
if (!entry.name) errors.push('config entry requires name');
316+
if (!entry.metric || !entry.metric.name || !entry.metric.direction) {
317+
errors.push('config entry requires metric with name and direction');
318+
}
319+
if (!Number.isInteger(entry.maxIterations) || entry.maxIterations < 1) {
320+
errors.push('config entry requires maxIterations (positive integer)');
321+
}
322+
}
323+
if (entry.type === 'result') {
324+
if (typeof entry.segment !== 'number') errors.push('result entry requires segment (number)');
325+
if (!['keep', 'discard', 'crash', 'checks_failed'].includes(entry.status)) {
326+
errors.push('result entry requires valid status');
327+
}
328+
if (typeof entry.durationMs !== 'number' || entry.durationMs < 0) {
329+
errors.push('result entry requires durationMs (non-negative number)');
330+
}
331+
}
332+
if (entry.type === 'resume') {
333+
if (typeof entry.timestamp !== 'number') errors.push('resume entry requires timestamp (number)');
334+
}
335+
return { ok: errors.length === 0, errors };
336+
}
337+
275338
// ---------------------------------------------------------------------------
276339
// Run experiment
277340
//
@@ -324,19 +387,23 @@ function runExperiment(command, timeoutMs) {
324387

325388
function runChecks(checksScript) {
326389
if (!fs.existsSync(checksScript)) {
327-
return { passed: true, skipped: true, stdout: '', stderr: '' };
390+
return { passed: true, skipped: true, stdout: '', stderr: '', timedOut: false };
328391
}
329392
const result = childProcess.spawnSync('bash', [checksScript], {
330393
encoding: 'utf8',
331394
timeout: 300000, // 5 min max for checks
332395
stdio: ['pipe', 'pipe', 'pipe'],
333396
env: { ...process.env, BUG_HUNTER_EXPERIMENT: '1' }
334397
});
398+
const timedOut = result.signal === 'SIGTERM'
399+
|| result.signal === 'SIGKILL'
400+
|| (result.error && result.error.code === 'ETIMEDOUT');
335401
return {
336-
passed: result.status === 0,
402+
passed: result.status === 0 && !timedOut,
337403
skipped: false,
338404
stdout: truncateOutput(result.stdout || '', MAX_STDOUT_BYTES),
339-
stderr: truncateOutput(result.stderr || '', MAX_STDOUT_BYTES)
405+
stderr: truncateOutput(result.stderr || '', MAX_STDOUT_BYTES),
406+
timedOut
340407
};
341408
}
342409

@@ -369,7 +436,9 @@ function gitCommitHash() {
369436
timeout: 10000
370437
});
371438
return (result.stdout || '').trim() || 'unknown';
372-
} catch {
439+
} catch (err) {
440+
const msg = err instanceof Error ? err.message : String(err);
441+
console.error(`Warning: git rev-parse failed: ${msg}`);
373442
return 'unknown';
374443
}
375444
}
@@ -378,12 +447,20 @@ function gitAutoCommit(description) {
378447
try {
379448
childProcess.spawnSync('git', ['add', '-A'], { encoding: 'utf8', timeout: 30000 });
380449
const msg = `experiment: ${description}\n\nResult: keep`;
381-
childProcess.spawnSync('git', ['commit', '-m', msg], {
450+
const result = childProcess.spawnSync('git', ['commit', '-m', msg], {
382451
encoding: 'utf8',
383452
timeout: 30000
384453
});
385-
} catch {
386-
// Non-fatal: commit failure doesn't break the experiment loop
454+
if (result.status !== 0) {
455+
const stderr = (result.stderr || '').trim();
456+
console.error(`Warning: git commit exited ${result.status}: ${stderr}`);
457+
return { ok: false, error: stderr || `exit code ${result.status}` };
458+
}
459+
return { ok: true, error: '' };
460+
} catch (err) {
461+
const msg = err instanceof Error ? err.message : String(err);
462+
console.error(`Warning: git auto-commit failed: ${msg}`);
463+
return { ok: false, error: msg };
387464
}
388465
}
389466

@@ -465,6 +542,11 @@ function cmdInit(args) {
465542
}
466543
};
467544

545+
const configValidation = validateExperimentEntry(configEntry);
546+
if (!configValidation.ok) {
547+
console.error(`Invalid config entry: ${configValidation.errors.join('; ')}`);
548+
process.exit(1);
549+
}
468550
appendJsonl(logPath, configEntry);
469551
console.log(JSON.stringify({
470552
ok: true,
@@ -498,7 +580,10 @@ function cmdRun(args) {
498580
}
499581

500582
const stopFile = named['stop-file'] || DEFAULT_STOP_FILE;
501-
const timeoutMs = Number.parseInt(named['timeout-ms'] || '', 10) || 120000;
583+
const rawTimeout = Number.parseInt(named['timeout-ms'] || '', 10);
584+
const timeoutMs = Number.isFinite(rawTimeout) && rawTimeout >= MIN_TIMEOUT_MS && rawTimeout <= MAX_TIMEOUT_MS
585+
? rawTimeout
586+
: 120000;
502587
const checksScript = named['checks-script'] || DEFAULT_CHECKS_SCRIPT;
503588

504589
// GUARDRAIL: Check stop file before every run
@@ -537,6 +622,7 @@ function cmdRun(args) {
537622
passed: runResult.passed,
538623
checksPassed: checksResult.passed,
539624
checksSkipped: checksResult.skipped,
625+
checksTimedOut: checksResult.timedOut || false,
540626
durationMs: runResult.durationMs,
541627
exitCode: runResult.exitCode,
542628
timedOut: runResult.timedOut,
@@ -599,6 +685,13 @@ function cmdLog(args) {
599685
console.error('Invalid --secondary JSON');
600686
process.exit(1);
601687
}
688+
// Validate all secondary metric values are finite numbers
689+
for (const [key, val] of Object.entries(secondaryMetrics)) {
690+
if (typeof val !== 'number' || !Number.isFinite(val)) {
691+
console.error(`Invalid secondary metric value for "${key}": expected a number, got ${typeof val}`);
692+
process.exit(1);
693+
}
694+
}
602695
}
603696

604697
const state = reconstructState(logPath);
@@ -620,8 +713,10 @@ function cmdLog(args) {
620713

621714
// Auto-commit on keep (pi-autoresearch pattern)
622715
let commit = 'unknown';
716+
let commitOk = true;
623717
if (status === 'keep' && autoCommit) {
624-
gitAutoCommit(description || `experiment #${state.totalRuns + 1}`);
718+
const commitResult = gitAutoCommit(description || `experiment #${state.totalRuns + 1}`);
719+
commitOk = commitResult.ok;
625720
commit = gitCommitHash();
626721
} else {
627722
commit = gitCommitHash();
@@ -651,6 +746,11 @@ function cmdLog(args) {
651746
durationMs: Number.isFinite(durationMs) && durationMs >= 0 ? durationMs : 0
652747
};
653748

749+
const resultValidation = validateExperimentEntry(resultEntry);
750+
if (!resultValidation.ok) {
751+
console.error(`Invalid result entry: ${resultValidation.errors.join('; ')}`);
752+
process.exit(1);
753+
}
654754
appendJsonl(logPath, resultEntry);
655755

656756
// Determine if this is the new best
@@ -674,6 +774,7 @@ function cmdLog(args) {
674774
delta,
675775
isBest,
676776
commit,
777+
commitOk,
677778
kept: state.kept + (status === 'keep' ? 1 : 0),
678779
discarded: state.discarded + (status === 'discard' ? 1 : 0),
679780
crashed: state.crashed + (status === 'crash' ? 1 : 0),

0 commit comments

Comments
 (0)