-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontracts.ts
More file actions
83 lines (78 loc) · 2.28 KB
/
contracts.ts
File metadata and controls
83 lines (78 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import { runInNewContext } from "node:vm";
import type { TrialOutput, ContractVerdict } from "./types.js";
import type { ContractConfig, JudgeContractConfig, JudgeProviderConfig, Scenario } from "./config.js";
import { evaluateWithPanel } from "./judges.js";
export async function evaluateContract(
output: TrialOutput,
contract: ContractConfig,
scenario: Scenario,
judges: readonly JudgeProviderConfig[],
): Promise<ContractVerdict> {
switch (contract.type) {
case "code":
return evaluateCodeContract(output, contract.assert, contract.name, scenario);
case "judge":
return evaluateJudgeContract(output, contract, scenario, judges);
}
}
function evaluateCodeContract(
output: TrialOutput,
assertExpr: string,
contractName: string,
scenario: Scenario,
): ContractVerdict {
try {
const sandbox = Object.freeze({
output: Object.freeze({
meta: Object.freeze({ ...output.meta }),
parsed: Object.freeze({ ...output.parsed }),
}),
scenario: Object.freeze({ ...scenario }),
Math: Object.freeze(Math),
Array: Object.freeze(Array),
String: Object.freeze(String),
RegExp: Object.freeze(RegExp),
Boolean: Object.freeze(Boolean),
Number: Object.freeze(Number),
JSON: Object.freeze(JSON),
});
const result: unknown = runInNewContext(
`"use strict"; (${assertExpr})`,
sandbox,
{ timeout: 100 },
);
return {
contractName,
status: Boolean(result) ? "pass" : "fail",
};
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
return {
contractName,
status: "error",
error: `Assertion error: ${msg}`,
};
}
}
async function evaluateJudgeContract(
output: TrialOutput,
contract: JudgeContractConfig,
scenario: Scenario,
judges: readonly JudgeProviderConfig[],
): Promise<ContractVerdict> {
try {
const result = await evaluateWithPanel(output, contract, scenario, judges);
return {
contractName: contract.name,
status: result.pass ? "pass" : "fail",
reasoning: result.reasoning,
};
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
return {
contractName: contract.name,
status: "error",
error: `Judge evaluation failed: ${msg}`,
};
}
}