Skip to content

Commit bd97765

Browse files
committed
Update judge to gpt 5.4
1 parent acc3f2e commit bd97765

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

evals/buffbench/judge.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ Provide detailed analysis, strengths, weaknesses, and numerical scores.`,
123123
const judgeAgents: Record<string, AgentDefinition> = {
124124
'judge-gpt': {
125125
id: 'judge-gpt',
126-
model: 'openai/gpt-5.1',
126+
model: 'openai/gpt-5.4',
127127
...judgeAgentBase,
128128
},
129129
'judge-gemini': {
@@ -133,7 +133,7 @@ const judgeAgents: Record<string, AgentDefinition> = {
133133
},
134134
'judge-sonnet': {
135135
id: 'judge-claude',
136-
model: 'anthropic/claude-sonnet-4.5',
136+
model: 'anthropic/claude-sonnet-4.6',
137137
...judgeAgentBase,
138138
},
139139
}

evals/buffbench/main-nightly.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ async function main() {
1717
const results = await runBuffBench({
1818
evalDataPaths: [ path.join(__dirname, 'eval-codebuff.json')],
1919
agents: ['base2-free'],
20-
taskConcurrency: 6,
20+
taskConcurrency: 5,
2121
saveTraces,
2222
})
2323

evals/buffbench/main.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ async function main() {
1111
await runBuffBench({
1212
evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')],
1313
agents: ['base2-free-evals'],
14-
taskConcurrency: 10,
14+
taskConcurrency: 6,
1515
saveTraces,
1616
})
1717

0 commit comments

Comments
 (0)