add package manager option (#75)

ibolmo · daviddkkim · web-flow · commit a0d327aa4c48 · 2025-07-07T19:57:24.000-05:00
Co-authored-by: Olmo Maldonado &lt;olmo@braintrust.dev&gt;
Co-authored-by: david kim &lt;david.kim@richmond.edu&gt;
diff --git a/.github/workflows/eval-py-uv.yml b/.github/workflows/eval-py-uv.yml
@@ -0,0 +1,50 @@
+name: Run Python evals
+
+on:
+  push:
+    # files:
+    #   - 'test-eval/**'
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  eval:
+    name: Run Python evals
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: "recursive"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12" # TODO: Matrix test different versions
+
+      - name: Install dependencies
+        run: |
+          cd test-eval-py
+          uv lock --check
+          uv sync --no-dev
+
+      - name: Run Evals
+        uses: ./
+        with:
+          api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+          root: test-eval-py
+          runtime: python
+          package_manager: uv
+
+      # - name: Start terminal session
+      #   uses: mxschmitt/action-tmate@v3
+      #   with:
+      #     limit-access-to-actor: true
diff --git a/README.md b/README.md
@@ -22,11 +22,13 @@ You can configure the following variables:
 - `paths`: Specific paths, relative to the root, containing evals you'd like to
   run.
 - `runtime`: Either `node` or `python`
+- `package_manager`: Either `npm`, `pnpm`, or `yarn` for a `node` runtime, or
+  `pip` or `uv` for a `python` runtime.
 - `use_proxy`: Either `true` or `false`. If set, `OPENAI_BASE_URL` will be set
   to `https://braintrustproxy.com/v1`, which will automatically cache repetitive
   LLM calls and run your evals faster. Defaults to `true`.
-- `terminate_on_failure`: Either `true` or `false`. If set to `true`, the evaluation
-  process will stop when an error occurs. Defaults to `false`.
+- `terminate_on_failure`: Either `true` or `false`. If set to `true`, the
+  evaluation process will stop when an error occurs. Defaults to `false`.
 
 ## Full example
 
@@ -82,9 +84,10 @@ jobs:
 
 To see examples of fully configured templates, see the `examples` directory:
 
-- [`node with npm`](examples/npm.yml)
-- [`node with pnpm`](examples/pnpm.yml)
-- [`python`](examples/python.yml)
+- [`node with npm`](examples/node/npm.yml)
+- [`node with pnpm`](examples/node/pnpm.yml)
+- [`python with pip`](examples/python/pip.yml)
+- [`python with uv`](examples/python/uv.yml)
 
 ## How it works
 
diff --git a/action.yml b/action.yml
@@ -23,6 +23,12 @@ inputs:
   runtime:
     description: "The runtime to use for evals. Valid values: node, python."
     required: true
+  package_manager:
+    description:
+      "The package manager to use for evals. Valid values: npm, pnpm, yarn, pip,
+      or uv depending on the runtime."
+    required: false
+    default: ""
   use_proxy:
     description:
       "Whether to use the Braintrust proxy (to cache LLM calls). Set to 'true'
@@ -31,8 +37,8 @@ inputs:
     default: "true"
   terminate_on_failure:
     description:
-      "Whether to terminate the evaluation process when an error occurs. Set to 'true'
-      or 'false'."
+      "Whether to terminate the evaluation process when an error occurs. Set to
+      'true' or 'false'."
     required: false
     default: "false"
   github_token:
diff --git a/eval/dist/index.js b/eval/dist/index.js
diff --git a/eval/dist/index.js.map b/eval/dist/index.js.map
diff --git a/eval/src/braintrust.ts b/eval/src/braintrust.ts
@@ -17,6 +17,7 @@ function snakeToCamelCase(str: string) {
 }
 
 async function runCommand(command: string, onSummary: OnSummaryFn) {
+  core.info(`> $ ${command}`);
   return new Promise((resolve, reject) => {
     const process = execSync(command);
 
@@ -76,18 +77,40 @@ export async function runEval(args: Params, onSummary: OnSummaryFn) {
   // Change working directory
   process.chdir(path.resolve(root));
 
-  let command: string;
   const terminateFlag = terminate_on_failure ? "--terminate-on-failure" : "";
 
-  switch (args.runtime) {
-    case "node":
-      command = `npx braintrust eval --jsonl ${terminateFlag} ${paths}`;
-      break;
-    case "python":
-      command = `braintrust eval --jsonl ${terminateFlag} ${paths}`;
-      break;
-    default:
-      throw new Error(`Unsupported runtime: ${args.runtime}`);
-  }
+  const baseCommand = (() => {
+    switch (args.runtime.toLowerCase().trim()) {
+      case "node":
+        switch (args.package_manager) {
+          case "":
+          case "npm":
+            return "npx braintrust";
+          case "pnpm":
+            return "pnpm dlx braintrust";
+          default:
+            throw new Error(
+              `Unsupported package manager: ${args.package_manager}`,
+            );
+        }
+      case "python":
+        switch ((args.package_manager || "").toLowerCase().trim()) {
+          case "":
+          case "pip":
+            return `braintrust`;
+          case "uv":
+            return `uv run braintrust`;
+          default:
+            throw new Error(
+              `Unsupported package manager: ${args.package_manager}`,
+            );
+        }
+      default:
+        throw new Error(`Unsupported runtime: ${args.runtime}`);
+    }
+  })();
+
+  const command = `${baseCommand} eval --jsonl ${terminateFlag} ${paths}`;
+
   await runCommand(command, onSummary);
 }
diff --git a/eval/src/main.ts b/eval/src/main.ts
@@ -6,23 +6,49 @@ import { ExperimentSummary } from "braintrust";
 import { capitalize } from "@braintrust/core";
 import { z } from "zod";
 
-const paramsSchema = z.strictObject({
-  api_key: z.string(),
-  root: z.string(),
-  paths: z.string(),
-  runtime: z.enum(["node", "python"]),
-  use_proxy: z
-    .string()
-    .toLowerCase()
-    .transform(x => JSON.parse(x))
-    .pipe(z.boolean()),
-  terminate_on_failure: z
-    .string()
-    .toLowerCase()
-    .transform(x => JSON.parse(x))
-    .pipe(z.boolean())
-    .default("false"),
-});
+const nodeManagers = ["npm", "pnpm"];
+const pythonManagers = ["pip", "uv"];
+
+const paramsSchema = z
+  .strictObject({
+    api_key: z.string(),
+    root: z.string(),
+    paths: z.string(),
+    runtime: z.enum(["node", "python"]),
+    package_manager: z
+      .enum(["", ...nodeManagers, ...pythonManagers])
+      .describe("The preferred package manager for the runtime selected")
+      .default(""),
+    use_proxy: z
+      .string()
+      .toLowerCase()
+      .transform(x => JSON.parse(x))
+      .pipe(z.boolean()),
+    terminate_on_failure: z
+      .string()
+      .toLowerCase()
+      .transform(x => JSON.parse(x))
+      .pipe(z.boolean())
+      .default("false"),
+  })
+  .refine(
+    data => {
+      if (data.package_manager === "") {
+        return true;
+      }
+      if (data.runtime === "node") {
+        return nodeManagers.includes(data.package_manager as any);
+      }
+      if (data.runtime === "python") {
+        return pythonManagers.includes(data.package_manager as any);
+      }
+      return false;
+    },
+    {
+      message: "Package manager must match the selected runtime",
+      path: ["package_manager"], // This will show the error on the package_manager field
+    },
+  );
 export type Params = z.infer<typeof paramsSchema>;
 
 const TITLE = "## Braintrust eval report\n";
@@ -37,6 +63,7 @@ async function main(): Promise<void> {
     root: core.getInput("root"),
     paths: core.getInput("paths"),
     runtime: core.getInput("runtime"),
+    package_manager: core.getInput("package_manager"),
     use_proxy: core.getInput("use_proxy"),
     terminate_on_failure: core.getInput("terminate_on_failure"),
   });
diff --git a/examples/node/npm.yml b/examples/node/npm.yml
diff --git a/examples/node/pnpm.yml b/examples/node/pnpm.yml
@@ -41,4 +41,5 @@ jobs:
         with:
           api_key: ${{ secrets.BRAINTRUST_API_KEY }}
           runtime: node
+          package_manager: pnpm
           root: my_eval_dir
diff --git a/examples/python/pip.yml b/examples/python/pip.yml
diff --git a/examples/python/uv.yml b/examples/python/uv.yml
@@ -0,0 +1,42 @@
+name: Run Python evals
+
+on:
+  push:
+    # Uncomment to run only when files in the 'evals' directory change
+    # - paths:
+    #     - "evals/**"
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  eval:
+    name: Run evals
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12" # Replace with your Python version
+
+      # Tweak this to a dependency manager of your choice
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r test-eval-py/requirements.txt
+
+      - name: Run Evals
+        uses: braintrustdata/eval-action@v1
+        with:
+          api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+          runtime: python
+          package_manager: uv
+          root: my_eval_dir
diff --git a/mise.toml b/mise.toml
@@ -0,0 +1,4 @@
+[tools]
+node = "20.6.0"
+pnpm = "8"
+python = "latest"
diff --git a/script/release b/script/release
@@ -25,7 +25,7 @@ GREEN='\033[0;32m'
 BLUE='\033[0;34m'
 
 # Get the latest release tag
-latest_tag=$(git describe --tags "$(git rev-list --tags --max-count=1)")
+latest_tag=$(git tag -l 'v*' --sort=-v:refname | head -n 1)
 
 if [[ -z "$latest_tag" ]]; then
 	# There are no existing release tags
@@ -59,6 +59,6 @@ git tag -a "$tag_first_part" -m "$tag_first_part Release" -f
 echo -e "${GREEN}Tagged: $tag_first_part${OFF}"
 
 # Push the new tag to the remote
-git push --tags -f
+git push --tags
 echo -e "${GREEN}Release tag pushed to remote${OFF}"
 echo -e "${GREEN}Done!${OFF}"