From 1e8e17c214fe53d508b374c5f38c4f72d0b9185b Mon Sep 17 00:00:00 2001
From: Nate Selvidge <nate.selvidge@braintrustdata.com>
Date: Fri, 13 Mar 2026 20:12:25 +0000
Subject: [PATCH 1/5] add support for pushing sandboxes

---
 scripts/functions-runner.py                   | 121 ++++++++++-
 scripts/functions-runner.ts                   | 109 ++++++++++
 src/functions/mod.rs                          |   3 +
 src/functions/push.rs                         |  51 +++++
 .../list-sandbox-type-parses/fixture.json     |   5 +
 tests/functions.rs                            | 196 ++++++++++++++++++
 6 files changed, 478 insertions(+), 7 deletions(-)
 create mode 100644 tests/functions-fixtures/list-sandbox-type-parses/fixture.json

diff --git a/scripts/functions-runner.py b/scripts/functions-runner.py
index 1f16f06..aa7bd05 100644
--- a/scripts/functions-runner.py
+++ b/scripts/functions-runner.py
@@ -3,6 +3,7 @@
 import inspect
 import json
 import os
+import re
 import sys
 from contextlib import nullcontext
 from typing import Any
@@ -42,7 +43,7 @@ def to_json_value(value: Any) -> Any:
     return str(value)
 
 
-def load_framework_globals() -> tuple[Any, Any, Any]:
+def load_framework_globals() -> tuple[Any, Any, Any, Any]:
     # Prefer current SDK layout first:
     # - braintrust.framework2 exposes module-level `global_`
     # - braintrust.framework exposes `_set_lazy_load`
@@ -50,13 +51,23 @@ def load_framework_globals() -> tuple[Any, Any, Any]:
         from braintrust.framework import _set_lazy_load as lazy
         from braintrust.framework2 import global_ as global_state
 
-        return global_state.functions, global_state.prompts, lazy
+        try:
+            from braintrust.framework import _evals
+        except (ImportError, ModuleNotFoundError):
+            _evals = None
+
+        return global_state.functions, global_state.prompts, lazy, _evals
     except (ImportError, ModuleNotFoundError):
         # Backward compatibility with older SDK layout.
         from braintrust.framework2.global_ import functions, prompts
         from braintrust.framework2.lazy_load import _set_lazy_load as lazy
 
-        return functions, prompts, lazy
+        try:
+            from braintrust.framework import _evals
+        except (ImportError, ModuleNotFoundError):
+            _evals = None
+
+        return functions, prompts, lazy, _evals
 
 
 def normalize_project_selector(project: Any) -> tuple[str | None, str | None]:
@@ -277,16 +288,105 @@ async def collect_function_event_entries(prompts_registry: Any) -> list[dict[str
     return entries
 
 
+def slugify(text: str) -> str:
+    return re.sub(r"^-|-$", "", re.sub(r"[^a-z0-9]+", "-", text.lower()))
+
+
+def collect_evaluator_entries(evals_registry: Any, source_file: str) -> list[dict[str, Any]]:
+    if evals_registry is None:
+        return []
+
+    evaluators = getattr(evals_registry, "evaluators", None)
+    if not evaluators or not isinstance(evaluators, dict):
+        return []
+
+    entries: list[dict[str, Any]] = []
+    stem_base, _ = os.path.splitext(os.path.basename(source_file))
+    stem = re.sub(r"\.eval$", "", stem_base)
+
+    for eval_name, instance in evaluators.items():
+        if instance is None:
+            continue
+        evaluator = getattr(instance, "evaluator", None)
+        if evaluator is None:
+            continue
+
+        project_name = getattr(evaluator, "project_name", None)
+        project_id, proj_name = normalize_project_selector(
+            {"project_name": project_name} if isinstance(project_name, str) else None
+        )
+
+        scores = getattr(evaluator, "scores", []) or []
+        score_descriptors = [
+            {"name": getattr(score, "__name__", f"scorer_{i}")}
+            for i, score in enumerate(scores)
+        ]
+
+        evaluator_definition: dict[str, Any] = {"scores": score_descriptors}
+
+        raw_params = getattr(evaluator, "parameters", None)
+        if raw_params is not None:
+            marker = getattr(raw_params, "__braintrust_parameters_marker", None)
+            if marker is True:
+                evaluator_definition["parameters"] = {
+                    "type": "braintrust.parameters",
+                    "schema": getattr(raw_params, "schema", None),
+                    "source": {
+                        "parametersId": getattr(raw_params, "id", None),
+                        "slug": getattr(raw_params, "slug", None),
+                        "name": getattr(raw_params, "name", None),
+                        "projectId": getattr(raw_params, "projectId", None),
+                        "version": getattr(raw_params, "version", None),
+                    },
+                }
+            else:
+                serialized = to_json_value(raw_params)
+                if serialized is not None:
+                    evaluator_definition["parameters"] = serialized
+
+        base_entry: dict[str, Any] = {"kind": "code"}
+        if project_id:
+            base_entry["project_id"] = project_id
+        if proj_name:
+            base_entry["project_name"] = proj_name
+
+        # Sandbox entry only — task and scorer entries are pushed separately
+        # when the eval is actually run, matching the Python SDK behavior.
+        sandbox_entry = {
+            **base_entry,
+            "name": f"Eval {eval_name} sandbox",
+            "slug": slugify(f"{stem}-{eval_name}-sandbox"),
+            "function_type": "sandbox",
+            "location": {
+                "type": "sandbox",
+                "sandbox_spec": {"provider": "lambda"},
+                "entrypoints": [source_file],
+                "eval_name": eval_name,
+                "evaluator_definition": evaluator_definition,
+            },
+            "metadata": {"_bt_sandbox_group_name": stem},
+        }
+        entries.append(sandbox_entry)
+
+    return entries
+
+
 async def process_file(file_path: str) -> dict[str, Any]:
     abs_path = os.path.abspath(file_path)
     cwd = os.getcwd()
     if cwd not in sys.path:
         sys.path.insert(0, cwd)
 
-    purge_local_modules(cwd, preserve_modules={__name__, "python_runner_common"})
-    functions_registry, prompts_registry, lazy_loader = load_framework_globals()
+    functions_registry, prompts_registry, lazy_loader, evals_registry = load_framework_globals()
     clear_registry(functions_registry)
     clear_registry(prompts_registry)
+    if (
+        evals_registry is not None
+        and hasattr(evals_registry, "evaluators")
+        and isinstance(evals_registry.evaluators, dict)
+    ):
+        evals_registry.evaluators.clear()
+    purge_local_modules(cwd, preserve_modules={__name__, "python_runner_common"})
 
     module_name = import_module_name_from_cwd(cwd, abs_path)
     if module_name is None:
@@ -298,12 +398,13 @@ async def process_file(file_path: str) -> dict[str, Any]:
         import_file(module_name, abs_path, extra_paths)
         code_entries = collect_code_entries(functions_registry)
         event_entries = await collect_function_event_entries(prompts_registry)
-        entries = [*code_entries, *event_entries]
+        evaluator_entries = collect_evaluator_entries(evals_registry, abs_path)
+        entries = [*code_entries, *event_entries, *evaluator_entries]
         file_manifest: dict[str, Any] = {
             "source_file": abs_path,
             "entries": entries,
         }
-        if code_entries:
+        if code_entries or evaluator_entries:
             runner_root = os.path.dirname(os.path.abspath(__file__))
             project_root = os.path.abspath(cwd)
             path_rest: list[str] = []
@@ -357,6 +458,12 @@ async def process_file(file_path: str) -> dict[str, Any]:
 
     clear_registry(functions_registry)
     clear_registry(prompts_registry)
+    if (
+        evals_registry is not None
+        and hasattr(evals_registry, "evaluators")
+        and isinstance(evals_registry.evaluators, dict)
+    ):
+        evals_registry.evaluators.clear()
     return file_manifest
 
 
diff --git a/scripts/functions-runner.ts b/scripts/functions-runner.ts
index 20fc308..1a5b9e8 100644
--- a/scripts/functions-runner.ts
+++ b/scripts/functions-runner.ts
@@ -88,6 +88,20 @@ type Manifest = {
   files: ManifestFile[];
 };
 
+function slugify(input: string): string {
+  return input
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, "-")
+    .replace(/^-|-$/g, "");
+}
+
+function extractScoreName(score: unknown, idx: number): string {
+  if (typeof score === "function" && typeof score.name === "string") {
+    return score.name || `scorer_${idx}`;
+  }
+  return `scorer_${idx}`;
+}
+
 type EvalRegistry = NonNullable<typeof globalThis._evals>;
 type ZodToJsonSchemaFn = (schema: unknown) => unknown;
 
@@ -473,6 +487,97 @@ function collectCodeEntries(items: CodeRegistryItem[]): CodeEntry[] {
   return entries;
 }
 
+function collectEvaluatorEntries(
+  evaluators: Record<string, unknown>,
+  sourceFilePath: string,
+): CodeEntry[] {
+  const entries: CodeEntry[] = [];
+  const ext = path.extname(sourceFilePath);
+  const stem = path.basename(sourceFilePath, ext).replace(/\.eval$/, "");
+
+  for (const [evalName, entry] of Object.entries(evaluators)) {
+    if (!entry || typeof entry !== "object") {
+      continue;
+    }
+
+    const evaluator = (entry as Record<string, unknown>).evaluator;
+    if (!evaluator || typeof evaluator !== "object") {
+      continue;
+    }
+
+    const evalObj = evaluator as Record<string, unknown>;
+    const projectName =
+      typeof evalObj.project_name === "string" ? evalObj.project_name : undefined;
+    const scores = Array.isArray(evalObj.scores) ? evalObj.scores : [];
+
+    const selector = asProjectSelector(
+      typeof projectName === "string" ? { name: projectName } : undefined,
+    );
+    const projectId =
+      typeof selector.project_id === "string" ? selector.project_id : undefined;
+    const selectorProjectName =
+      typeof selector.project_name === "string"
+        ? selector.project_name
+        : undefined;
+
+    const scoreDescriptors = scores.map((s: unknown, i: number) => ({
+      name: extractScoreName(s, i),
+    }));
+
+    const evaluatorDefinition: JsonObject = {
+      scores: scoreDescriptors as JsonValue,
+    };
+
+    const rawParams = evalObj.parameters;
+    if (rawParams !== undefined && rawParams !== null) {
+      const marker =
+        rawParams !== null &&
+        typeof rawParams === "object" &&
+        (rawParams as Record<string, unknown>).__braintrust_parameters_marker === true;
+      if (marker) {
+        const paramObj = rawParams as Record<string, unknown>;
+        evaluatorDefinition.parameters = toJsonValue({
+          type: "braintrust.parameters",
+          schema: paramObj.schema,
+          source: {
+            parametersId: paramObj.id,
+            slug: paramObj.slug,
+            name: paramObj.name,
+            projectId: paramObj.projectId,
+            version: paramObj.version,
+          },
+        } as JsonValue);
+      } else {
+        const serialized = toJsonValue(rawParams as JsonValue);
+        if (serialized !== undefined) {
+          evaluatorDefinition.parameters = serialized;
+        }
+      }
+    }
+
+    // Sandbox entry only — task and scorer entries are pushed separately
+    // when the eval is actually run, matching the Python SDK behavior.
+    entries.push({
+      kind: "code",
+      project_id: projectId,
+      project_name: selectorProjectName,
+      name: `Eval ${evalName} sandbox`,
+      slug: slugify(`${stem}-${evalName}-sandbox`),
+      function_type: "sandbox",
+      location: {
+        type: "sandbox",
+        sandbox_spec: { provider: "lambda" },
+        entrypoints: [sourceFilePath],
+        eval_name: evalName,
+        evaluator_definition: evaluatorDefinition as JsonValue,
+      } as JsonValue,
+      metadata: { _bt_sandbox_group_name: stem },
+    });
+  }
+
+  return entries;
+}
+
 async function processFile(filePath: string): Promise<ManifestFile> {
   const absolutePath = path.resolve(process.cwd(), filePath);
   const fallbackRegistry = freshRegistry();
@@ -492,6 +597,10 @@ async function processFile(filePath: string): Promise<ManifestFile> {
       registry.parameters as EventRegistryItem[],
       false,
     )),
+    ...collectEvaluatorEntries(
+      registry.evaluators as Record<string, unknown>,
+      absolutePath,
+    ),
   ];
 
   return {
diff --git a/src/functions/mod.rs b/src/functions/mod.rs
index 9974284..eadc100 100644
--- a/src/functions/mod.rs
+++ b/src/functions/mod.rs
@@ -35,6 +35,7 @@ pub enum FunctionTypeFilter {
     Classifier,
     Tag,
     Parameters,
+    Sandbox,
 }
 
 impl FunctionTypeFilter {
@@ -50,6 +51,7 @@ impl FunctionTypeFilter {
             Self::Classifier => "classifier",
             Self::Tag => "tag",
             Self::Parameters => "parameters",
+            Self::Sandbox => "sandbox",
         }
     }
 
@@ -73,6 +75,7 @@ impl FunctionTypeFilter {
             Self::Classifier => "classifiers",
             Self::Tag => "tags",
             Self::Parameters => "parameters",
+            Self::Sandbox => "sandboxes",
         }
     }
 }
diff --git a/src/functions/push.rs b/src/functions/push.rs
index 3803c25..a83e58f 100644
--- a/src/functions/push.rs
+++ b/src/functions/push.rs
@@ -3263,6 +3263,57 @@ mod tests {
         );
     }
 
+    #[test]
+    fn code_function_data_passes_through_sandbox_location() {
+        let runtime = RuntimeContext {
+            runtime: "node".to_string(),
+            version: "20.0.0".to_string(),
+        };
+        let sandbox_location = serde_json::json!({
+            "type": "sandbox",
+            "sandbox_spec": { "provider": "lambda" },
+            "entrypoints": ["/tmp/eval.ts"],
+            "eval_name": "my-eval",
+            "evaluator_definition": {
+                "scores": [{ "name": "accuracy" }]
+            }
+        });
+        let value = build_code_function_data(
+            &runtime,
+            sandbox_location.clone(),
+            "bundle-sandbox-1",
+            None,
+        );
+
+        assert_eq!(value["type"], "code");
+        assert_eq!(value["data"]["type"], "bundle");
+        assert_eq!(value["data"]["bundle_id"], "bundle-sandbox-1");
+        assert_eq!(value["data"]["location"], sandbox_location);
+        assert!(value["data"].get("preview").is_none());
+    }
+
+    #[test]
+    fn code_function_data_passes_through_experiment_location() {
+        let runtime = RuntimeContext {
+            runtime: "node".to_string(),
+            version: "20.0.0".to_string(),
+        };
+        let experiment_location = serde_json::json!({
+            "type": "experiment",
+            "eval_name": "my-eval",
+            "position": { "type": "task" }
+        });
+        let value = build_code_function_data(
+            &runtime,
+            experiment_location.clone(),
+            "bundle-task-1",
+            None,
+        );
+
+        assert_eq!(value["type"], "code");
+        assert_eq!(value["data"]["location"], experiment_location);
+    }
+
     fn test_base_args() -> BaseArgs {
         BaseArgs {
             json: false,
diff --git a/tests/functions-fixtures/list-sandbox-type-parses/fixture.json b/tests/functions-fixtures/list-sandbox-type-parses/fixture.json
new file mode 100644
index 0000000..642a921
--- /dev/null
+++ b/tests/functions-fixtures/list-sandbox-type-parses/fixture.json
@@ -0,0 +1,5 @@
+{
+  "command": ["functions", "list", "--type", "sandbox"],
+  "expect_success": false,
+  "stderr_not_contains": ["invalid value 'sandbox'"]
+}
diff --git a/tests/functions.rs b/tests/functions.rs
index 5f18f8e..c9e6a17 100644
--- a/tests/functions.rs
+++ b/tests/functions.rs
@@ -1909,6 +1909,202 @@ exit 24
     );
 }
 
+#[cfg(unix)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn functions_push_sandbox_entries_reach_api() {
+    if !command_exists("node") {
+        eprintln!("Skipping functions_push_sandbox_entries_reach_api (node not installed).");
+        return;
+    }
+
+    let state = Arc::new(MockServerState::default());
+    state
+        .projects
+        .lock()
+        .expect("projects lock")
+        .push(MockProject {
+            id: "proj_mock".to_string(),
+            name: "mock-project".to_string(),
+            org_id: "org_mock".to_string(),
+        });
+    let server = MockServer::start(state.clone()).await;
+
+    let tmp = tempdir().expect("tempdir");
+    let source = tmp.path().join("my-eval.js");
+    std::fs::write(
+        &source,
+        "globalThis._evals ??= { functions: [], prompts: [], parameters: [], evaluators: {}, reporters: {} };\n",
+    )
+    .expect("write source file");
+
+    let runner = tmp.path().join("mock-runner.sh");
+    std::fs::write(
+        &runner,
+        r#"#!/bin/sh
+set -eu
+_runner_script="$1"
+shift
+_runner_name="$(basename "$_runner_script")"
+
+if [ "$_runner_name" = "functions-runner.ts" ]; then
+node - "$@" <<'NODE'
+const path = require("node:path");
+const files = process.argv.slice(2);
+const manifest = {
+  runtime_context: { runtime: "node", version: process.versions.node || "unknown" },
+  files: files.map((file) => {
+    const abs = path.resolve(file);
+    return {
+      source_file: abs,
+      entries: [
+        {
+          kind: "code",
+          project_name: "mock-project",
+          name: "Eval my-eval sandbox",
+          slug: "my-eval-my-eval-sandbox",
+          function_type: "sandbox",
+          location: {
+            type: "sandbox",
+            sandbox_spec: { provider: "lambda" },
+            entrypoints: [abs],
+            eval_name: "my-eval",
+            evaluator_definition: { scores: [{ name: "accuracy" }] }
+          },
+          metadata: { _bt_sandbox_group_name: "my-eval" }
+        }
+      ]
+    };
+  })
+};
+process.stdout.write(JSON.stringify(manifest));
+NODE
+exit 0
+fi
+
+if [ "$_runner_name" = "functions-bundler.ts" ]; then
+  _source_file="$1"
+  _output_file="$2"
+  cp "$_source_file" "$_output_file"
+  exit 0
+fi
+
+echo "unexpected runner script: $_runner_name" >&2
+exit 24
+"#,
+    )
+    .expect("write mock runner");
+    use std::os::unix::fs::PermissionsExt;
+    let mut perms = std::fs::metadata(&runner)
+        .expect("runner metadata")
+        .permissions();
+    perms.set_mode(0o755);
+    std::fs::set_permissions(&runner, perms).expect("runner permissions");
+
+    let output = Command::new(bt_binary_path())
+        .current_dir(tmp.path())
+        .args([
+            "functions",
+            "--json",
+            "push",
+            "--file",
+            source
+                .to_str()
+                .expect("source path should be valid UTF-8 for test"),
+            "--language",
+            "javascript",
+            "--runner",
+            runner
+                .to_str()
+                .expect("runner path should be valid UTF-8 for test"),
+            "--if-exists",
+            "replace",
+        ])
+        .env("BRAINTRUST_API_KEY", "test-key")
+        .env("BRAINTRUST_ORG_NAME", "test-org")
+        .env("BRAINTRUST_API_URL", &server.base_url)
+        .env("BRAINTRUST_APP_URL", &server.base_url)
+        .env("BRAINTRUST_NO_COLOR", "1")
+        .env_remove("BRAINTRUST_PROFILE")
+        .output()
+        .expect("run bt functions push");
+
+    server.stop().await;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        panic!("mock push failed:\n{stderr}");
+    }
+
+    let summary: Value = serde_json::from_slice(&output.stdout).expect("parse push summary");
+    assert_eq!(summary["status"].as_str(), Some("success"));
+    assert_eq!(summary["uploaded_files"].as_u64(), Some(1));
+    assert_eq!(summary["failed_files"].as_u64(), Some(0));
+
+    let inserted = state
+        .inserted_functions
+        .lock()
+        .expect("inserted functions lock")
+        .clone();
+    assert_eq!(
+        inserted.len(),
+        1,
+        "expected 1 inserted function (sandbox only)"
+    );
+
+    let sandbox_obj = inserted[0].as_object().expect("sandbox should be an object");
+    assert_eq!(
+        sandbox_obj.get("slug").and_then(Value::as_str),
+        Some("my-eval-my-eval-sandbox")
+    );
+    assert_eq!(
+        sandbox_obj.get("function_type").and_then(Value::as_str),
+        Some("sandbox")
+    );
+
+    // Verify function_data.data.location is sandbox type
+    let function_data = sandbox_obj
+        .get("function_data")
+        .and_then(Value::as_object)
+        .expect("function_data object");
+    assert_eq!(
+        function_data.get("type").and_then(Value::as_str),
+        Some("code")
+    );
+    let data = function_data
+        .get("data")
+        .and_then(Value::as_object)
+        .expect("function_data.data object");
+    let location = data
+        .get("location")
+        .and_then(Value::as_object)
+        .expect("location object");
+    assert_eq!(
+        location.get("type").and_then(Value::as_str),
+        Some("sandbox")
+    );
+    let sandbox_spec = location
+        .get("sandbox_spec")
+        .and_then(Value::as_object)
+        .expect("sandbox_spec object");
+    assert_eq!(
+        sandbox_spec.get("provider").and_then(Value::as_str),
+        Some("lambda")
+    );
+
+    // Verify metadata
+    let metadata = sandbox_obj
+        .get("metadata")
+        .and_then(Value::as_object)
+        .expect("metadata object");
+    assert_eq!(
+        metadata
+            .get("_bt_sandbox_group_name")
+            .and_then(Value::as_str),
+        Some("my-eval")
+    );
+
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn functions_pull_works_against_mock_api() {
     let state = Arc::new(MockServerState::default());

From 9a70a3d9357f8093a24c9528c48b220d317050af Mon Sep 17 00:00:00 2001
From: Nate Selvidge <nate.selvidge@braintrustdata.com>
Date: Fri, 13 Mar 2026 21:18:35 +0000
Subject: [PATCH 2/5] fixes

---
 scripts/functions-bundler.ts |  5 ++++-
 scripts/functions-runner.py  |  2 +-
 scripts/functions-runner.ts  |  2 +-
 src/functions/push.rs        | 10 +++++++++-
 tests/functions.rs           | 16 ++++++++++++++++
 5 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/scripts/functions-bundler.ts b/scripts/functions-bundler.ts
index 02b9d11..de0af0b 100644
--- a/scripts/functions-bundler.ts
+++ b/scripts/functions-bundler.ts
@@ -275,7 +275,10 @@ async function main(): Promise<void> {
   const externalPackages = parseExternalPackages(
     process.env.BT_FUNCTIONS_PUSH_EXTERNAL_PACKAGES,
   );
-  const external = buildExternalPackagePatterns(externalPackages);
+  const selfContained = process.env.BT_FUNCTIONS_PUSH_SELF_CONTAINED === "1";
+  const external = selfContained
+    ? ["fsevents", "chokidar"]
+    : buildExternalPackagePatterns(externalPackages);
   const tsconfig = loadTsconfigPath();
 
   const outputDir = path.dirname(outputFile);
diff --git a/scripts/functions-runner.py b/scripts/functions-runner.py
index aa7bd05..a140a19 100644
--- a/scripts/functions-runner.py
+++ b/scripts/functions-runner.py
@@ -360,7 +360,7 @@ def collect_evaluator_entries(evals_registry: Any, source_file: str) -> list[dic
             "location": {
                 "type": "sandbox",
                 "sandbox_spec": {"provider": "lambda"},
-                "entrypoints": [source_file],
+                "entrypoints": [os.path.relpath(source_file)],
                 "eval_name": eval_name,
                 "evaluator_definition": evaluator_definition,
             },
diff --git a/scripts/functions-runner.ts b/scripts/functions-runner.ts
index 1a5b9e8..cd49251 100644
--- a/scripts/functions-runner.ts
+++ b/scripts/functions-runner.ts
@@ -567,7 +567,7 @@ function collectEvaluatorEntries(
       location: {
         type: "sandbox",
         sandbox_spec: { provider: "lambda" },
-        entrypoints: [sourceFilePath],
+        entrypoints: [path.relative(process.cwd(), sourceFilePath)],
         eval_name: evalName,
         evaluator_definition: evaluatorDefinition as JsonValue,
       } as JsonValue,
diff --git a/src/functions/push.rs b/src/functions/push.rs
index a83e58f..5d4de74 100644
--- a/src/functions/push.rs
+++ b/src/functions/push.rs
@@ -716,10 +716,14 @@ async fn push_file(
 
     let mut function_events: Vec<Value> = Vec::new();
 
+    let has_sandbox_entries = code_entries
+        .iter()
+        .any(|(code, _)| code.function_type.as_deref() == Some("sandbox"));
+
     if !code_entries.is_empty() {
         let (upload_bytes, content_encoding) = match selected_language {
             SourceLanguage::JsLike => {
-                let bundle_bytes = build_js_bundle(source_path, args)?;
+                let bundle_bytes = build_js_bundle(source_path, args, has_sandbox_entries)?;
                 let gzipped = gzip_bytes(&bundle_bytes).map_err(|err| FileFailure {
                     reason: HardFailureReason::BundleUploadFailed,
                     message: format!("failed to gzip {}: {err}", source_path.display()),
@@ -922,6 +926,7 @@ async fn push_file(
 fn build_js_bundle(
     source_path: &Path,
     args: &PushArgs,
+    self_contained: bool,
 ) -> std::result::Result<Vec<u8>, FileFailure> {
     let build_dir = TempBuildDir::create("bt-functions-js-bundle").map_err(|err| FileFailure {
         reason: HardFailureReason::BundleUploadFailed,
@@ -954,6 +959,9 @@ fn build_js_bundle(
             args.external_packages.join(","),
         );
     }
+    if self_contained {
+        command.env("BT_FUNCTIONS_PUSH_SELF_CONTAINED", "1");
+    }
 
     let output = command.output().map_err(|err| FileFailure {
         reason: HardFailureReason::RunnerSpawnFailed,
diff --git a/tests/functions.rs b/tests/functions.rs
index c9e6a17..fa3de7c 100644
--- a/tests/functions.rs
+++ b/tests/functions.rs
@@ -1091,6 +1091,22 @@ fn functions_python_runner_emits_valid_manifest_with_bundle() {
         "from contextlib import nullcontext\n\ndef _set_lazy_load(_enabled):\n    return nullcontext()\n",
     )
     .expect("write lazy_load.py");
+    std::fs::write(
+        stub_root.join("braintrust").join("framework.py"),
+        concat!(
+            "from contextlib import nullcontext\n",
+            "\n",
+            "def _set_lazy_load(_enabled):\n",
+            "    return nullcontext()\n",
+            "\n",
+            "class _EvalFile:\n",
+            "    def __init__(self):\n",
+            "        self.evaluators = {}\n",
+            "\n",
+            "_evals = _EvalFile()\n",
+        ),
+    )
+    .expect("write framework.py");
 
     let sample_path = tmp.path().join("sample_tool.py");
     std::fs::write(

From 0aafaf19c8e978fdd4578fcce5615f484f0806f8 Mon Sep 17 00:00:00 2001
From: Nate Selvidge <nate.selvidge@braintrustdata.com>
Date: Fri, 13 Mar 2026 22:20:12 +0000
Subject: [PATCH 3/5] fixes

---
 .gitignore                      |  1 +
 scripts/eval-runner.py          | 15 +++++++++++++++
 scripts/functions-runner.py     | 11 ++++++++---
 scripts/python_runner_common.py | 12 ++++++++++++
 src/eval.rs                     | 26 ++++++++------------------
 src/functions/push.rs           |  8 ++++++++
 6 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/.gitignore b/.gitignore
index da0bacf..a0fe928 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ tests/evals/js/eval-bun/test-data.txt
 __pycache__
 
 bt-sync
+*.env
\ No newline at end of file
diff --git a/scripts/eval-runner.py b/scripts/eval-runner.py
index 8742375..e9d3ce0 100755
--- a/scripts/eval-runner.py
+++ b/scripts/eval-runner.py
@@ -435,6 +435,21 @@ def load_evaluators(files: list[str]) -> tuple[list[EvaluatorInstance], dict[str
     cwd = os.getcwd()
     if cwd not in sys.path:
         sys.path.insert(0, cwd)
+
+    # Add the project root inferred from input files to sys.path so that
+    # sibling-package imports work when files live outside CWD (e.g.
+    # sandbox bundles extracted to a temp directory).  Walk up from each
+    # file's directory looking for a register.py (bundle marker) or the
+    # filesystem root, whichever comes first.
+    for f in files:
+        d = os.path.dirname(os.path.abspath(f))
+        while d and d != os.path.dirname(d):
+            if os.path.isfile(os.path.join(d, "register.py")):
+                if d not in sys.path:
+                    sys.path.insert(0, d)
+                break
+            d = os.path.dirname(d)
+
     unique_files: set[str] = set()
     for file_path in files:
         for candidate in collect_files(file_path):
diff --git a/scripts/functions-runner.py b/scripts/functions-runner.py
index a140a19..fa93c75 100644
--- a/scripts/functions-runner.py
+++ b/scripts/functions-runner.py
@@ -29,9 +29,9 @@ def to_json_value(value: Any) -> Any:
         return [to_json_value(item) for item in value]
     if isinstance(value, dict):
         return {str(key): to_json_value(val) for key, val in value.items()}
-    if hasattr(value, "model_dump"):
+    if hasattr(value, "model_dump") and not isinstance(value, type):
         return to_json_value(value.model_dump())
-    if hasattr(value, "dict"):
+    if hasattr(value, "dict") and not isinstance(value, type):
         return to_json_value(value.dict())
     if hasattr(value, "__dict__"):
         result: dict[str, Any] = {}
@@ -451,8 +451,13 @@ async def process_file(file_path: str) -> dict[str, Any]:
                     continue
                 seen_sources.add(init_source)
                 bundled_sources.append(init_source)
+            # Compute entry_module as a CWD-relative dotted path so that the
+            # archive root inferred by push.rs walks back to CWD, matching
+            # the Python SDK behavior and allowing sibling-package imports.
+            rel_path = os.path.relpath(abs_path, cwd)
+            archive_module = re.sub(r"\.py$", "", rel_path).replace("-", "_").replace(os.sep, ".")
             file_manifest["python_bundle"] = {
-                "entry_module": module_name,
+                "entry_module": archive_module,
                 "sources": bundled_sources,
             }
 
diff --git a/scripts/python_runner_common.py b/scripts/python_runner_common.py
index 4a738f9..1d83141 100644
--- a/scripts/python_runner_common.py
+++ b/scripts/python_runner_common.py
@@ -61,6 +61,12 @@ def purge_local_modules(cwd: str, preserve_modules: set[str] | None = None) -> N
         candidate_abs = os.path.abspath(candidate)
         if not os.path.isfile(candidate_abs):
             continue
+        # Skip installed packages inside virtualenvs under cwd (e.g. .venv/lib/.../site-packages).
+        if os.sep + "site-packages" + os.sep in candidate_abs:
+            continue
+        # Skip bt runner scripts materialised under .bt/.
+        if os.sep + ".bt" + os.sep in candidate_abs:
+            continue
         try:
             common = os.path.commonpath([candidate_abs, cwd_abs])
         except ValueError:
@@ -84,6 +90,12 @@ def collect_python_sources(cwd: str, input_file: str) -> list[str]:
             continue
         if not candidate_abs.endswith(".py"):
             continue
+        # Skip installed packages inside virtualenvs under cwd (e.g. .venv/lib/.../site-packages).
+        if os.sep + "site-packages" + os.sep in candidate_abs:
+            continue
+        # Skip bt runner scripts materialised under .bt/.
+        if os.sep + ".bt" + os.sep in candidate_abs:
+            continue
         try:
             common = os.path.commonpath([candidate_abs, cwd])
         except ValueError:
diff --git a/src/eval.rs b/src/eval.rs
index d108c61..6bc5d40 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -1207,12 +1207,6 @@ fn serialize_sse_event(event: &str, data: &str) -> String {
     format!("event: {event}\ndata: {data}\n\n")
 }
 
-fn is_eval_progress_payload(progress: &SseProgressEventData) -> bool {
-    serde_json::from_str::<EvalProgressData>(&progress.data)
-        .map(|payload| payload.kind_type == "eval_progress")
-        .unwrap_or(false)
-}
-
 fn encode_eval_event_for_http(event: &EvalEvent) -> Option<String> {
     match event {
         EvalEvent::Processing(payload) => serde_json::to_string(payload)
@@ -1224,15 +1218,9 @@ fn encode_eval_event_for_http(event: &EvalEvent) -> Option<String> {
         EvalEvent::Summary(summary) => serde_json::to_string(summary)
             .ok()
             .map(|data| serialize_sse_event("summary", &data)),
-        EvalEvent::Progress(progress) => {
-            if is_eval_progress_payload(progress) {
-                None
-            } else {
-                serde_json::to_string(progress)
-                    .ok()
-                    .map(|data| serialize_sse_event("progress", &data))
-            }
-        }
+        EvalEvent::Progress(progress) => serde_json::to_string(progress)
+            .ok()
+            .map(|data| serialize_sse_event("progress", &data)),
         EvalEvent::Dependencies { .. } => None,
         EvalEvent::Done => Some(serialize_sse_event("done", "")),
         EvalEvent::Error {
@@ -2188,7 +2176,7 @@ fn build_python_command(
         .or_else(|| std::env::var("BT_EVAL_PYTHON_RUNNER").ok())
         .or_else(|| std::env::var("BT_EVAL_PYTHON").ok());
 
-    let command = if let Some(explicit) = runner_override {
+    let mut command = if let Some(explicit) = runner_override {
         let mut command = Command::new(explicit);
         command.arg(runner).args(files);
         command
@@ -4022,7 +4010,7 @@ mod tests {
     }
 
     #[test]
-    fn encode_eval_event_for_http_filters_internal_eval_progress() {
+    fn encode_eval_event_for_http_forwards_eval_progress() {
         let event = EvalEvent::Progress(SseProgressEventData {
             id: "id-1".to_string(),
             object_type: "task".to_string(),
@@ -4034,7 +4022,9 @@ mod tests {
             data: r#"{"type":"eval_progress","kind":"start","total":1}"#.to_string(),
         });
 
-        assert!(encode_eval_event_for_http(&event).is_none());
+        let encoded = encode_eval_event_for_http(&event).expect("eval_progress should be forwarded");
+        assert!(encoded.contains("event: progress"));
+        assert!(encoded.contains("eval_progress"));
     }
 
     #[test]
diff --git a/src/functions/push.rs b/src/functions/push.rs
index 5d4de74..0ca6109 100644
--- a/src/functions/push.rs
+++ b/src/functions/push.rs
@@ -1125,6 +1125,14 @@ fn collect_classified_files(inputs: &[PathBuf]) -> Result<ClassifiedFiles> {
     let mut explicit_js_like = 0usize;
     let mut explicit_python = 0usize;
 
+    // Always include CWD so that Python files importing from sibling
+    // packages (e.g. `from src.agents import ...`) are accepted.
+    if let Ok(cwd) = std::env::current_dir() {
+        if let Ok(canonical_cwd) = cwd.canonicalize() {
+            allowed_roots.insert(canonical_cwd);
+        }
+    }
+
     for input in inputs {
         let path = if input.is_absolute() {
             input.clone()

From 9628bbc49f6889acda041fdfd869a51dd22893dc Mon Sep 17 00:00:00 2001
From: Nate Selvidge <nate.selvidge@braintrustdata.com>
Date: Mon, 16 Mar 2026 16:11:49 +0000
Subject: [PATCH 4/5] WIP

---
 scripts/functions-runner.py | 10 +++++++++-
 src/eval.rs                 | 24 +++++++++++++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/scripts/functions-runner.py b/scripts/functions-runner.py
index fa93c75..0291a50 100644
--- a/scripts/functions-runner.py
+++ b/scripts/functions-runner.py
@@ -340,7 +340,15 @@ def collect_evaluator_entries(evals_registry: Any, source_file: str) -> list[dic
                     },
                 }
             else:
-                serialized = to_json_value(raw_params)
+                # Use the braintrust SDK's parameters_to_json_schema when
+                # available so that Pydantic model classes are converted to
+                # proper staticParametersSchema entries (type: "data" with a
+                # JSON Schema) that the UI can parse.
+                try:
+                    from braintrust.parameters import parameters_to_json_schema
+                    serialized = parameters_to_json_schema(raw_params)
+                except Exception:
+                    serialized = to_json_value(raw_params)
                 if serialized is not None:
                     evaluator_definition["parameters"] = serialized
 
diff --git a/src/eval.rs b/src/eval.rs
index 6bc5d40..bf3afd5 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -1218,9 +1218,21 @@ fn encode_eval_event_for_http(event: &EvalEvent) -> Option<String> {
         EvalEvent::Summary(summary) => serde_json::to_string(summary)
             .ok()
             .map(|data| serialize_sse_event("summary", &data)),
-        EvalEvent::Progress(progress) => serde_json::to_string(progress)
-            .ok()
-            .map(|data| serialize_sse_event("progress", &data)),
+        EvalEvent::Progress(progress) => {
+            // Filter out internal eval_progress events (start/increment/stop)
+            // which are used for CLI progress bars but crash the UI stream
+            // parser.  Only forward external progress events (e.g. json_delta).
+            if serde_json::from_str::<EvalProgressData>(&progress.data)
+                .map(|p| p.kind_type == "eval_progress")
+                .unwrap_or(false)
+            {
+                None
+            } else {
+                serde_json::to_string(progress)
+                    .ok()
+                    .map(|data| serialize_sse_event("progress", &data))
+            }
+        }
         EvalEvent::Dependencies { .. } => None,
         EvalEvent::Done => Some(serialize_sse_event("done", "")),
         EvalEvent::Error {
@@ -4010,7 +4022,7 @@ mod tests {
     }
 
     #[test]
-    fn encode_eval_event_for_http_forwards_eval_progress() {
+    fn encode_eval_event_for_http_filters_internal_eval_progress() {
         let event = EvalEvent::Progress(SseProgressEventData {
             id: "id-1".to_string(),
             object_type: "task".to_string(),
@@ -4022,9 +4034,7 @@ mod tests {
             data: r#"{"type":"eval_progress","kind":"start","total":1}"#.to_string(),
         });
 
-        let encoded = encode_eval_event_for_http(&event).expect("eval_progress should be forwarded");
-        assert!(encoded.contains("event: progress"));
-        assert!(encoded.contains("eval_progress"));
+        assert!(encode_eval_event_for_http(&event).is_none());
     }
 
     #[test]

From 1ff2094a12a5b7372349929665c2566b34776908 Mon Sep 17 00:00:00 2001
From: Nate Selvidge <nate.selvidge@braintrustdata.com>
Date: Wed, 18 Mar 2026 21:34:08 +0000
Subject: [PATCH 5/5] fix CI

---
 src/eval.rs              |  2 +-
 src/source_language.rs   |  1 +
 tests/eval_dev_server.rs | 16 ++++++++--------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/eval.rs b/src/eval.rs
index bf3afd5..bb67433 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -2188,7 +2188,7 @@ fn build_python_command(
         .or_else(|| std::env::var("BT_EVAL_PYTHON_RUNNER").ok())
         .or_else(|| std::env::var("BT_EVAL_PYTHON").ok());
 
-    let mut command = if let Some(explicit) = runner_override {
+    let command = if let Some(explicit) = runner_override {
         let mut command = Command::new(explicit);
         command.arg(runner).args(files);
         command
diff --git a/src/source_language.rs b/src/source_language.rs
index 8a1b71f..1bb82bc 100644
--- a/src/source_language.rs
+++ b/src/source_language.rs
@@ -7,6 +7,7 @@ pub enum SourceLanguage {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum JsExtensionProfile {
     FunctionsPush,
+    #[allow(dead_code)]
     Eval,
 }
 
diff --git a/tests/eval_dev_server.rs b/tests/eval_dev_server.rs
index 3921b92..88d6936 100644
--- a/tests/eval_dev_server.rs
+++ b/tests/eval_dev_server.rs
@@ -123,10 +123,10 @@ fn parse_sse_events(body: &str) -> Vec<SseEvent> {
     let mut current_data = Vec::<String>::new();
 
     for line in body.lines() {
-        if line.starts_with("event: ") {
-            current_event = line["event: ".len()..].to_string();
-        } else if line.starts_with("data: ") {
-            current_data.push(line["data: ".len()..].to_string());
+        if let Some(event) = line.strip_prefix("event: ") {
+            current_event = event.to_string();
+        } else if let Some(data) = line.strip_prefix("data: ") {
+            current_data.push(data.to_string());
         } else if line.is_empty() && !current_event.is_empty() {
             events.push(SseEvent {
                 event: std::mem::take(&mut current_event),
@@ -518,10 +518,10 @@ fn streaming_eval_post(
             Ok(l) => l,
             Err(_) => break,
         };
-        if line.starts_with("event: ") {
-            current_event = line["event: ".len()..].to_string();
-        } else if line.starts_with("data: ") {
-            current_data.push(line["data: ".len()..].to_string());
+        if let Some(event) = line.strip_prefix("event: ") {
+            current_event = event.to_string();
+        } else if let Some(data) = line.strip_prefix("data: ") {
+            current_data.push(data.to_string());
         } else if line.is_empty() && !current_event.is_empty() {
             let event = SseEvent {
                 event: std::mem::take(&mut current_event),