Skip to content

Commit a0d327a

Browse files
ibolmodaviddkkim
andauthored
add package manager option (#75)
Co-authored-by: Olmo Maldonado <olmo@braintrust.dev> Co-authored-by: david kim <david.kim@richmond.edu>
1 parent 7051894 commit a0d327a

13 files changed

Lines changed: 225 additions & 69 deletions

File tree

.github/workflows/eval-py-uv.yml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: Run Python evals
2+
3+
on:
4+
push:
5+
# files:
6+
# - 'test-eval/**'
7+
8+
permissions:
9+
pull-requests: write
10+
contents: read
11+
12+
jobs:
13+
eval:
14+
name: Run Python evals
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- name: Checkout
19+
id: checkout
20+
uses: actions/checkout@v4
21+
with:
22+
fetch-depth: 0
23+
submodules: "recursive"
24+
25+
- name: Install uv
26+
uses: astral-sh/setup-uv@v5
27+
28+
- name: Set up Python
29+
uses: actions/setup-python@v4
30+
with:
31+
python-version: "3.12" # TODO: Matrix test different versions
32+
33+
- name: Install dependencies
34+
run: |
35+
cd test-eval-py
36+
uv lock --check
37+
uv sync --no-dev
38+
39+
- name: Run Evals
40+
uses: ./
41+
with:
42+
api_key: ${{ secrets.BRAINTRUST_API_KEY }}
43+
root: test-eval-py
44+
runtime: python
45+
package_manager: uv
46+
47+
# - name: Start terminal session
48+
# uses: mxschmitt/action-tmate@v3
49+
# with:
50+
# limit-access-to-actor: true

README.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ You can configure the following variables:
2222
- `paths`: Specific paths, relative to the root, containing evals you'd like to
2323
run.
2424
- `runtime`: Either `node` or `python`
25+
- `package_manager`: Either `npm`, `pnpm`, or `yarn` for a `node` runtime, or
26+
`pip` or `uv` for a `python` runtime.
2527
- `use_proxy`: Either `true` or `false`. If set, `OPENAI_BASE_URL` will be set
2628
to `https://braintrustproxy.com/v1`, which will automatically cache repetitive
2729
LLM calls and run your evals faster. Defaults to `true`.
28-
- `terminate_on_failure`: Either `true` or `false`. If set to `true`, the evaluation
29-
process will stop when an error occurs. Defaults to `false`.
30+
- `terminate_on_failure`: Either `true` or `false`. If set to `true`, the
31+
evaluation process will stop when an error occurs. Defaults to `false`.
3032

3133
## Full example
3234

@@ -82,9 +84,10 @@ jobs:
8284

8385
To see examples of fully configured templates, see the `examples` directory:
8486

85-
- [`node with npm`](examples/npm.yml)
86-
- [`node with pnpm`](examples/pnpm.yml)
87-
- [`python`](examples/python.yml)
87+
- [`node with npm`](examples/node/npm.yml)
88+
- [`node with pnpm`](examples/node/pnpm.yml)
89+
- [`python with pip`](examples/python/pip.yml)
90+
- [`python with uv`](examples/python/uv.yml)
8891

8992
## How it works
9093

action.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ inputs:
2323
runtime:
2424
description: "The runtime to use for evals. Valid values: node, python."
2525
required: true
26+
package_manager:
27+
description:
28+
"The package manager to use for evals. Valid values: npm, pnpm, yarn, pip,
29+
or uv depending on the runtime."
30+
required: false
31+
default: ""
2632
use_proxy:
2733
description:
2834
"Whether to use the Braintrust proxy (to cache LLM calls). Set to 'true'
@@ -31,8 +37,8 @@ inputs:
3137
default: "true"
3238
terminate_on_failure:
3339
description:
34-
"Whether to terminate the evaluation process when an error occurs. Set to 'true'
35-
or 'false'."
40+
"Whether to terminate the evaluation process when an error occurs. Set to
41+
'true' or 'false'."
3642
required: false
3743
default: "false"
3844
github_token:

eval/dist/index.js

Lines changed: 29 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

eval/dist/index.js.map

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

eval/src/braintrust.ts

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ function snakeToCamelCase(str: string) {
1717
}
1818

1919
async function runCommand(command: string, onSummary: OnSummaryFn) {
20+
core.info(`> $ ${command}`);
2021
return new Promise((resolve, reject) => {
2122
const process = execSync(command);
2223

@@ -76,18 +77,40 @@ export async function runEval(args: Params, onSummary: OnSummaryFn) {
7677
// Change working directory
7778
process.chdir(path.resolve(root));
7879

79-
let command: string;
8080
const terminateFlag = terminate_on_failure ? "--terminate-on-failure" : "";
8181

82-
switch (args.runtime) {
83-
case "node":
84-
command = `npx braintrust eval --jsonl ${terminateFlag} ${paths}`;
85-
break;
86-
case "python":
87-
command = `braintrust eval --jsonl ${terminateFlag} ${paths}`;
88-
break;
89-
default:
90-
throw new Error(`Unsupported runtime: ${args.runtime}`);
91-
}
82+
const baseCommand = (() => {
83+
switch (args.runtime.toLowerCase().trim()) {
84+
case "node":
85+
switch (args.package_manager) {
86+
case "":
87+
case "npm":
88+
return "npx braintrust";
89+
case "pnpm":
90+
return "pnpm dlx braintrust";
91+
default:
92+
throw new Error(
93+
`Unsupported package manager: ${args.package_manager}`,
94+
);
95+
}
96+
case "python":
97+
switch ((args.package_manager || "").toLowerCase().trim()) {
98+
case "":
99+
case "pip":
100+
return `braintrust`;
101+
case "uv":
102+
return `uv run braintrust`;
103+
default:
104+
throw new Error(
105+
`Unsupported package manager: ${args.package_manager}`,
106+
);
107+
}
108+
default:
109+
throw new Error(`Unsupported runtime: ${args.runtime}`);
110+
}
111+
})();
112+
113+
const command = `${baseCommand} eval --jsonl ${terminateFlag} ${paths}`;
114+
92115
await runCommand(command, onSummary);
93116
}

eval/src/main.ts

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,49 @@ import { ExperimentSummary } from "braintrust";
66
import { capitalize } from "@braintrust/core";
77
import { z } from "zod";
88

9-
const paramsSchema = z.strictObject({
10-
api_key: z.string(),
11-
root: z.string(),
12-
paths: z.string(),
13-
runtime: z.enum(["node", "python"]),
14-
use_proxy: z
15-
.string()
16-
.toLowerCase()
17-
.transform(x => JSON.parse(x))
18-
.pipe(z.boolean()),
19-
terminate_on_failure: z
20-
.string()
21-
.toLowerCase()
22-
.transform(x => JSON.parse(x))
23-
.pipe(z.boolean())
24-
.default("false"),
25-
});
9+
const nodeManagers = ["npm", "pnpm"];
10+
const pythonManagers = ["pip", "uv"];
11+
12+
const paramsSchema = z
13+
.strictObject({
14+
api_key: z.string(),
15+
root: z.string(),
16+
paths: z.string(),
17+
runtime: z.enum(["node", "python"]),
18+
package_manager: z
19+
.enum(["", ...nodeManagers, ...pythonManagers])
20+
.describe("The preferred package manager for the runtime selected")
21+
.default(""),
22+
use_proxy: z
23+
.string()
24+
.toLowerCase()
25+
.transform(x => JSON.parse(x))
26+
.pipe(z.boolean()),
27+
terminate_on_failure: z
28+
.string()
29+
.toLowerCase()
30+
.transform(x => JSON.parse(x))
31+
.pipe(z.boolean())
32+
.default("false"),
33+
})
34+
.refine(
35+
data => {
36+
if (data.package_manager === "") {
37+
return true;
38+
}
39+
if (data.runtime === "node") {
40+
return nodeManagers.includes(data.package_manager as any);
41+
}
42+
if (data.runtime === "python") {
43+
return pythonManagers.includes(data.package_manager as any);
44+
}
45+
return false;
46+
},
47+
{
48+
message: "Package manager must match the selected runtime",
49+
path: ["package_manager"], // This will show the error on the package_manager field
50+
},
51+
);
2652
export type Params = z.infer<typeof paramsSchema>;
2753

2854
const TITLE = "## Braintrust eval report\n";
@@ -37,6 +63,7 @@ async function main(): Promise<void> {
3763
root: core.getInput("root"),
3864
paths: core.getInput("paths"),
3965
runtime: core.getInput("runtime"),
66+
package_manager: core.getInput("package_manager"),
4067
use_proxy: core.getInput("use_proxy"),
4168
terminate_on_failure: core.getInput("terminate_on_failure"),
4269
});
File renamed without changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,5 @@ jobs:
4141
with:
4242
api_key: ${{ secrets.BRAINTRUST_API_KEY }}
4343
runtime: node
44+
package_manager: pnpm
4445
root: my_eval_dir

0 commit comments

Comments
 (0)