From 896134c6a1c3d0c823c48d9701e5e012cf6e950f Mon Sep 17 00:00:00 2001 From: Olmo Maldonado Date: Thu, 12 Mar 2026 14:41:28 -0700 Subject: [PATCH 1/2] Add mixed Python/JS eval tests --- tests/evals/mixed/mixed-py-js/basic.eval.ts | 22 +++++++++++++++++++++ tests/evals/mixed/mixed-py-js/eval_basic.py | 20 +++++++++++++++++++ tests/evals/mixed/mixed-py-js/fixture.json | 5 +++++ tests/evals/mixed/mixed-py-js/package.json | 10 ++++++++++ 4 files changed, 57 insertions(+) create mode 100644 tests/evals/mixed/mixed-py-js/basic.eval.ts create mode 100644 tests/evals/mixed/mixed-py-js/eval_basic.py create mode 100644 tests/evals/mixed/mixed-py-js/fixture.json create mode 100644 tests/evals/mixed/mixed-py-js/package.json diff --git a/tests/evals/mixed/mixed-py-js/basic.eval.ts b/tests/evals/mixed/mixed-py-js/basic.eval.ts new file mode 100644 index 0000000..3974888 --- /dev/null +++ b/tests/evals/mixed/mixed-py-js/basic.eval.ts @@ -0,0 +1,22 @@ +import { Eval } from "braintrust"; + +const exactMatch = ({ + output, + expected, +}: { + output: number; + expected?: number; +}) => ({ + name: "exact_match", + score: output === expected ? 1 : 0, +}); + +Eval("cli-mixed-basic", { + experimentName: "Mixed Basic Test", + data: () => [ + { input: 1, expected: 1 }, + { input: 2, expected: 2 }, + ], + task: async (input: number) => input, + scores: [exactMatch], +}); diff --git a/tests/evals/mixed/mixed-py-js/eval_basic.py b/tests/evals/mixed/mixed-py-js/eval_basic.py new file mode 100644 index 0000000..95b75e8 --- /dev/null +++ b/tests/evals/mixed/mixed-py-js/eval_basic.py @@ -0,0 +1,20 @@ +from braintrust import Eval + + +def data(): + return [ + {"input": 1, "expected": 1}, + {"input": 2, "expected": 2}, + ] + + +def task(value, hooks=None): + return value + + +Eval( + "cli-mixed-basic", + data=data, + task=task, + scores=[], +) diff --git a/tests/evals/mixed/mixed-py-js/fixture.json b/tests/evals/mixed/mixed-py-js/fixture.json new file mode 100644 index 0000000..3dda64c --- /dev/null +++ b/tests/evals/mixed/mixed-py-js/fixture.json @@ -0,0 +1,5 @@ +{ + "files": ["basic.eval.ts", "eval_basic.py"], + "runners_js": ["tsx", "bun", "deno"], + "runners_python": ["python"] +} diff --git a/tests/evals/mixed/mixed-py-js/package.json b/tests/evals/mixed/mixed-py-js/package.json new file mode 100644 index 0000000..1beb641 --- /dev/null +++ b/tests/evals/mixed/mixed-py-js/package.json @@ -0,0 +1,10 @@ +{ + "name": "bt-eval-mixed-py-js", + "private": true, + "dependencies": { + "braintrust": "^2.2.0" + }, + "devDependencies": { + "tsx": "^4.16.2" + } +} From 8fe84573011c46a6fb577a3a5cd529a2e596983a Mon Sep 17 00:00:00 2001 From: Olmo Maldonado Date: Fri, 13 Mar 2026 12:19:40 -0700 Subject: [PATCH 2/2] Infer language and runner from file extensions Replace explicit `--language` and `--runner` flags with automatic detection based on file extensions. Introduce separate `--runner-js` and `--runner-python` flags for explicit runner overrides. Hide the deprecated `--language` flag and maintain backward compatibility for the `BT_EVAL_RUNNER` environment variable via `--runner-js-legacy-env`. Update test fixtures to use `runners_js` and `runners_python` instead of `runtime` and `runners`. --- README.md | 10 +- src/eval.rs | 931 +++++++++++++----- tests/eval_fixtures.rs | 373 +++++-- tests/evals/js/eval-bun/fixture.json | 5 +- tests/evals/js/eval-cjs-monorepo/fixture.json | 2 +- tests/evals/js/eval-deno/fixture.json | 5 +- tests/evals/js/eval-esm-monorepo/fixture.json | 2 +- tests/evals/js/eval-esm/fixture.json | 2 +- tests/evals/js/eval-extra-args/fixture.json | 2 +- tests/evals/js/eval-glob/fixture.json | 2 +- .../js/eval-ts-esm-only-dep/fixture.json | 4 +- tests/evals/js/eval-ts-esm/fixture.json | 4 +- tests/evals/js/eval-ts-local-sdk/fixture.json | 4 +- tests/evals/js/eval-ts-monorepo/fixture.json | 2 +- .../js/eval-ts-vite-monorepo/fixture.json | 2 +- .../evals/js/eval-vite-node-cjs/fixture.json | 2 +- tests/evals/js/eval-vite-node/fixture.json | 4 +- tests/evals/js/eval-vite/fixture.json | 2 +- tests/evals/py/absolute/fixture.json | 1 - tests/evals/py/atexit_flush/fixture.json | 4 +- tests/evals/py/basic/fixture.json | 1 - tests/evals/py/local_import/fixture.json | 1 - tests/evals/py/relative/fixture.json | 1 - tests/evals/py/streaming/fixture.json | 4 +- 24 files changed, 981 insertions(+), 389 deletions(-) diff --git a/README.md b/README.md index 92a972e..bf71b0a 100644 --- a/README.md +++ b/README.md @@ -136,12 +136,14 @@ Files inside `node_modules`, `.venv`, `venv`, `site-packages`, `dist-packages`, **Runners:** - By default, `bt eval` auto-detects a JavaScript runner from your project (`tsx`, `vite-node`, `ts-node`, then `ts-node-esm`). -- Set a runner explicitly with `--runner` / `BT_EVAL_RUNNER`: - - `bt eval --runner vite-node tutorial.eval.ts` - - `bt eval --runner tsx tutorial.eval.ts` +- Set a JavaScript runner explicitly with `--runner-js` / `BT_EVAL_JS_RUNNER`: + - `bt eval --runner-js vite-node tutorial.eval.ts` + - `bt eval --runner-js tsx tutorial.eval.ts` +- Set a Python runner explicitly with `--runner-python` / `BT_EVAL_PYTHON_RUNNER`: + - `bt eval --runner-python python tutorial.eval.py` - `bt` resolves local `node_modules/.bin` entries automatically — no need for a full path. - If eval execution fails with ESM/top-level-await related errors, retry with: - - `bt eval --runner vite-node tutorial.eval.ts` + - `bt eval --runner-js vite-node tutorial.eval.ts` **Passing arguments to the eval file:** diff --git a/src/eval.rs b/src/eval.rs index aba1c34..713665c 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -21,6 +21,7 @@ use crossterm::style::{ Attribute, Color as CtColor, ResetColor, SetAttribute, SetBackgroundColor, SetForegroundColor, Stylize, }; +use futures_util::future; use futures_util::stream; use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle}; use reqwest::Client; @@ -164,8 +165,8 @@ struct ResolvedDatasetEvalData { #[derive(Clone)] struct DevServerState { base: BaseArgs, - language_override: Option, - runner_override: Option, + js_runner_override: Option, + python_runner_override: Option, files: Vec, no_send_logs: bool, options: EvalRunOptions, @@ -231,17 +232,41 @@ pub struct EvalArgs { #[arg(value_name = "FILE")] pub files: Vec, - /// Eval runner binary (e.g. tsx, bun, ts-node, deno, python). Defaults to tsx for JS files. - #[arg(long, short = 'r', env = "BT_EVAL_RUNNER", value_name = "RUNNER")] - pub runner: Option, + /// Runner command for JavaScript/TypeScript eval files (e.g. tsx, "pnpm exec tsx", "node --import tsx/esm"). Defaults to tsx. + #[arg( + long = "runner-js", + alias = "runner", + short = 'r', + env = "BT_EVAL_JS_RUNNER", + value_name = "RUNNER" + )] + pub runner_js: Option, + + // Backward-compat env alias for --runner-js. + #[arg( + long = "runner-js-legacy-env", + env = "BT_EVAL_RUNNER", + value_name = "RUNNER", + hide = true + )] + runner_js_legacy_env: Option, + + /// Runner command for Python eval files (e.g. python3, "uv run python", "poetry run python"). + #[arg( + long = "runner-python", + env = "BT_EVAL_PYTHON_RUNNER", + value_name = "RUNNER" + )] + pub runner_python: Option, - /// Force eval language instead of inferring from file extensions. + /// Deprecated: language is now inferred from file extensions. This flag is ignored. #[arg( long, short = 'l', env = "BT_EVAL_LANGUAGE", value_enum, - value_name = "LANGUAGE" + value_name = "LANGUAGE", + hide = true )] pub language: Option, @@ -350,6 +375,14 @@ pub struct EvalArgs { pub dev_allowed_origin: Vec, } +impl EvalArgs { + fn effective_js_runner(&self) -> Option<&str> { + self.runner_js + .as_deref() + .or(self.runner_js_legacy_env.as_deref()) + } +} + #[derive(Debug, Clone)] struct EvalRunOptions { jsonl: bool, @@ -362,6 +395,9 @@ struct EvalRunOptions { } pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { + if args.language.is_some() { + eprintln!("warning: --language / BT_EVAL_LANGUAGE is deprecated and will be removed in a future release. Language is now inferred from file extensions."); + } if args.dev && args.watch { anyhow::bail!("--watch is not supported with --dev."); } @@ -378,6 +414,7 @@ pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { } } validate_eval_input_files(&files)?; + let js_runner_override = args.effective_js_runner().map(ToOwned::to_owned); let options = EvalRunOptions { jsonl: args.jsonl, @@ -390,12 +427,11 @@ pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { }; if args.dev { - let language = detect_eval_language(&files, args.language)?; let app_url = resolve_app_url(&base); let state = DevServerState { base: base.clone(), - language_override: Some(language), - runner_override: args.runner.clone(), + js_runner_override: js_runner_override.clone(), + python_runner_override: args.runner_python.clone(), files, no_send_logs: args.no_send_logs, options, @@ -415,8 +451,8 @@ pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { if args.watch { run_eval_files_watch( &base, - args.language, - args.runner.as_deref(), + js_runner_override.as_deref(), + args.runner_python.as_deref(), &files, args.no_send_logs, &options, @@ -425,8 +461,8 @@ pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { } else { let output = run_eval_files_once( &base, - args.language, - args.runner.as_deref(), + js_runner_override.as_deref(), + args.runner_python.as_deref(), &files, args.no_send_logs, &options, @@ -441,8 +477,8 @@ pub async fn run(base: BaseArgs, args: EvalArgs) -> Result<()> { async fn run_eval_files_watch( base: &BaseArgs, - language_override: Option, - runner_override: Option<&str>, + js_runner_override: Option<&str>, + python_runner_override: Option<&str>, files: &[String], no_send_logs: bool, options: &EvalRunOptions, @@ -459,8 +495,8 @@ async fn run_eval_files_watch( loop { match run_eval_files_once( base, - language_override, - runner_override, + js_runner_override, + python_runner_override, files, no_send_logs, options, @@ -500,10 +536,10 @@ async fn run_eval_files_watch( } } -struct EvalPlan<'a> { +struct EvalPlan { language: EvalLanguage, - files: &'a [String], - runner_override: Option<&'a str>, + files: Vec, + runner_override: Option, show_js_hint: bool, retry_policy: RetryPolicy, } @@ -516,38 +552,71 @@ struct EvalAttemptOutput { runner_kind: RunnerKind, } -fn build_eval_plan<'a>( - files: &'a [String], - language_override: Option, - runner_override: Option<&'a str>, -) -> Result> { - let language = detect_eval_language(files, language_override)?; - let show_js_hint = language == EvalLanguage::JavaScript && runner_override.is_none(); - let has_ts_files = language == EvalLanguage::JavaScript && has_ts_eval_files(files); - let retry_policy = if show_js_hint && has_ts_files { - RetryPolicy::Allow - } else { - RetryPolicy::Disallow - }; +fn partition_files_by_language(files: &[String]) -> Result)>> { + let mut js_files: Vec = Vec::new(); + let mut py_files: Vec = Vec::new(); + for file in files { + let ext = PathBuf::from(file) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_ascii_lowercase(); + match ext.as_str() { + "py" => py_files.push(file.clone()), + "ts" | "tsx" | "js" | "mjs" | "cjs" => js_files.push(file.clone()), + _ => anyhow::bail!("Unsupported eval file extension: {ext}"), + } + } + let mut result = Vec::new(); + if !js_files.is_empty() { + result.push((EvalLanguage::JavaScript, js_files)); + } + if !py_files.is_empty() { + result.push((EvalLanguage::Python, py_files)); + } + if result.is_empty() { + anyhow::bail!("No eval files provided"); + } + Ok(result) +} - Ok(EvalPlan { - language, - files, - runner_override, - show_js_hint, - retry_policy, - }) +fn build_eval_plans( + files: &[String], + js_runner_override: Option<&str>, + python_runner_override: Option<&str>, +) -> Result> { + let partitions = partition_files_by_language(files)?; + partitions + .into_iter() + .map(|(language, files)| { + let plan_runner = match language { + EvalLanguage::JavaScript => js_runner_override, + EvalLanguage::Python => python_runner_override, + }; + let show_js_hint = language == EvalLanguage::JavaScript && js_runner_override.is_none(); + let has_ts_files = language == EvalLanguage::JavaScript && has_ts_eval_files(&files); + let retry_policy = if show_js_hint && has_ts_files { + RetryPolicy::Allow + } else { + RetryPolicy::Disallow + }; + Ok(EvalPlan { + language, + files, + runner_override: plan_runner.map(ToOwned::to_owned), + show_js_hint, + retry_policy, + }) + }) + .collect() } -async fn run_eval_files_once( +async fn run_eval_plan_once( base: &BaseArgs, - language_override: Option, - runner_override: Option<&str>, - files: &[String], + plan: &EvalPlan, no_send_logs: bool, options: &EvalRunOptions, ) -> Result { - let plan = build_eval_plan(files, language_override, runner_override)?; let console_policy = match plan.retry_policy { RetryPolicy::Allow => ConsolePolicy::BufferStderr, RetryPolicy::Disallow => ConsolePolicy::Forward, @@ -555,7 +624,7 @@ async fn run_eval_files_once( let mut output = run_eval_attempt( base, - &plan, + plan, no_send_logs, options, &[], @@ -564,12 +633,12 @@ async fn run_eval_files_once( ) .await?; - if !output.status.success() && should_retry_esm(&plan, &output) { + if !output.status.success() && should_retry_esm(plan, &output) { let first_attempt_stderr = std::mem::take(&mut output.stderr_lines); eprintln!("Eval failed with ESM/CJS interop error. Retrying in ESM mode..."); output = run_eval_attempt( base, - &plan, + plan, no_send_logs, options, &[], @@ -586,14 +655,14 @@ async fn run_eval_files_once( report_buffered_stderr(&output.stderr_lines, options.verbose); } - if !output.status.success() && plan.show_js_hint && should_retry_esm(&plan, &output) { + if !output.status.success() && plan.show_js_hint && should_retry_esm(plan, &output) { eprintln!("Hint: If this eval uses ESM features (like top-level await), try `--runner vite-node`."); } let mut dependencies = normalize_watch_paths(output.dependency_files.into_iter().map(PathBuf::from))?; if plan.language == EvalLanguage::JavaScript { - let static_dependencies = collect_js_static_dependencies(files)?; + let static_dependencies = collect_js_static_dependencies(&plan.files)?; dependencies = merge_watch_paths(&dependencies, &static_dependencies); } @@ -603,9 +672,43 @@ async fn run_eval_files_once( }) } +async fn run_eval_files_once( + base: &BaseArgs, + js_runner_override: Option<&str>, + python_runner_override: Option<&str>, + files: &[String], + no_send_logs: bool, + options: &EvalRunOptions, +) -> Result { + let plans = build_eval_plans(files, js_runner_override, python_runner_override)?; + let plan_outputs = future::try_join_all( + plans + .iter() + .map(|plan| run_eval_plan_once(base, plan, no_send_logs, options)), + ) + .await?; + + // Aggregate: prefer first failing status; merge all dependency sets. + let mut combined_status: Option = None; + let mut combined_deps: Vec = Vec::new(); + for output in plan_outputs { + combined_deps = merge_watch_paths(&combined_deps, &output.dependencies); + combined_status = Some(match combined_status { + None => output.status, + Some(existing) if existing.success() && !output.status.success() => output.status, + Some(existing) => existing, + }); + } + + Ok(EvalRunOutput { + status: combined_status.expect("build_eval_plans guarantees at least one plan"), + dependencies: combined_deps, + }) +} + async fn run_eval_attempt( base: &BaseArgs, - plan: &EvalPlan<'_>, + plan: &EvalPlan, no_send_logs: bool, options: &EvalRunOptions, extra_env: &[(String, String)], @@ -615,8 +718,8 @@ async fn run_eval_attempt( let spawned = spawn_eval_runner( base, plan.language, - plan.runner_override, - plan.files, + plan.runner_override.as_deref(), + &plan.files, no_send_logs, options, extra_env, @@ -886,7 +989,7 @@ fn report_buffered_stderr(lines: &[String], verbose: bool) { } } -fn should_retry_esm(plan: &EvalPlan<'_>, output: &EvalAttemptOutput) -> bool { +fn should_retry_esm(plan: &EvalPlan, output: &EvalAttemptOutput) -> bool { if matches!(plan.retry_policy, RetryPolicy::Disallow) { return false; } @@ -1324,28 +1427,8 @@ async fn dev_server_list(state: web::Data, req: HttpRequest) -> } }; - let language = match detect_eval_language(&state.files, state.language_override) { - Ok(language) => language, - Err(err) => { - return json_error_response( - actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, - &format!("{err:#}"), - ); - } - }; - let spawned = match spawn_eval_runner( - &state.base, - language, - state.runner_override.as_deref(), - &state.files, - state.no_send_logs, - &state.options, - &extra_env, - JsMode::Auto, - ) - .await - { - Ok(value) => value, + let partitions = match partition_files_by_language(&state.files) { + Ok(p) => p, Err(err) => { return json_error_response( actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, @@ -1354,27 +1437,63 @@ async fn dev_server_list(state: web::Data, req: HttpRequest) -> } }; - let mut stdout_lines = Vec::new(); - let mut errors: Vec<(String, Option)> = Vec::new(); - let output = - match drive_eval_runner( - spawned.process, - ConsolePolicy::Forward, - |event| match event { - EvalEvent::Console { stream, message } if stream == "stdout" => { - stdout_lines.push(message); - } - EvalEvent::Error { - message, - stack: _, - status, - } => errors.push((message, status)), - _ => {} - }, - ) - .await - { - Ok(output) => output, + // Spawn one runner per language partition and collect their manifests in parallel. + let mut handles = Vec::new(); + for (language, files) in partitions { + let state = state.clone(); + let extra_env = extra_env.clone(); + handles.push(tokio::spawn(async move { + let runner = match language { + EvalLanguage::JavaScript => state.js_runner_override.as_deref(), + EvalLanguage::Python => state.python_runner_override.as_deref(), + }; + let spawned = spawn_eval_runner( + &state.base, + language, + runner, + &files, + state.no_send_logs, + &state.options, + &extra_env, + JsMode::Auto, + ) + .await?; + let mut stdout_lines: Vec = Vec::new(); + let mut errors: Vec<(String, Option)> = Vec::new(); + let output = + drive_eval_runner( + spawned.process, + ConsolePolicy::Forward, + |event| match event { + EvalEvent::Console { stream, message } if stream == "stdout" => { + stdout_lines.push(message); + } + EvalEvent::Error { + message, + stack: _, + status, + } => errors.push((message, status)), + _ => {} + }, + ) + .await?; + Ok::<_, anyhow::Error>((language, stdout_lines, errors, output.status)) + })); + } + + // Collect each partition's manifest alongside its language. + let mut language_manifests: Vec<(EvalLanguage, serde_json::Map)> = Vec::new(); + let mut first_error: Option<(String, Option)> = None; + let mut any_failure = false; + for handle in handles { + let (language, stdout_lines, errors, status) = match handle.await { + Ok(Ok(r)) => r, + Ok(Err(err)) => { + return json_error_response( + actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, + &format!("{err:#}"), + ); + } Err(err) => { return json_error_response( actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, @@ -1382,41 +1501,74 @@ async fn dev_server_list(state: web::Data, req: HttpRequest) -> ); } }; + if !status.success() { + any_failure = true; + } + if first_error.is_none() { + first_error = errors.into_iter().next(); + } + let mut partition_manifest: Option> = None; + for line in stdout_lines.iter().rev() { + if let Ok(Value::Object(map)) = serde_json::from_str::(line) { + partition_manifest = Some(map); + break; + } + } + if partition_manifest.is_none() { + let joined = stdout_lines.join("\n"); + if let Ok(Value::Object(map)) = serde_json::from_str::(&joined) { + partition_manifest = Some(map); + } + } + if let Some(map) = partition_manifest { + language_manifests.push((language, map)); + } + } - if let Some((message, status)) = errors.first() { - let status = status - .and_then(|status| actix_web::http::StatusCode::from_u16(status).ok()) + if let Some((message, status)) = first_error { + let http_status = status + .and_then(|s| actix_web::http::StatusCode::from_u16(s).ok()) .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); - return json_error_response(status, message); + return json_error_response(http_status, &message); } - if !output.status.success() { + if any_failure { return json_error_response( actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, "Eval runner exited with an error.", ); } + if language_manifests.is_empty() { + return json_error_response( + actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, + "Failed to parse evaluator manifest from runner output.", + ); + } - let mut parsed_manifest: Option = None; - for line in stdout_lines.iter().rev() { - if let Ok(value) = serde_json::from_str::(line) { - parsed_manifest = Some(value); - break; + // Merge manifests across partitions. If the same evaluator name appears in + // multiple language partitions, suffix each with the language name so both + // are registered and Braintrust can report them independently. + let mut key_counts: std::collections::HashMap = std::collections::HashMap::new(); + for (_, map) in &language_manifests { + for key in map.keys() { + *key_counts.entry(key.clone()).or_insert(0) += 1; } } - if parsed_manifest.is_none() { - let joined = stdout_lines.join("\n"); - if let Ok(value) = serde_json::from_str::(&joined) { - parsed_manifest = Some(value); + let mut merged_manifest = serde_json::Map::new(); + for (language, map) in language_manifests { + let lang_label = match language { + EvalLanguage::JavaScript => "js", + EvalLanguage::Python => "python", + }; + for (key, value) in map { + let final_key = if key_counts.get(&key).copied().unwrap_or(0) > 1 { + format!("{key} [{lang_label}]") + } else { + key + }; + merged_manifest.insert(final_key, value); } } - - match parsed_manifest { - Some(manifest) => HttpResponse::Ok().json(manifest), - None => json_error_response( - actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, - "Failed to parse evaluator manifest from runner output.", - ), - } + HttpResponse::Ok().json(Value::Object(merged_manifest)) } async fn dev_server_eval( @@ -1451,8 +1603,8 @@ async fn dev_server_eval( } }; - let language = match detect_eval_language(&state.files, state.language_override) { - Ok(language) => language, + let partitions = match partition_files_by_language(&state.files) { + Ok(p) => p, Err(err) => { return json_error_response( actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, @@ -1460,82 +1612,110 @@ async fn dev_server_eval( ); } }; - let spawned = match spawn_eval_runner( - &state.base, - language, - state.runner_override.as_deref(), - &state.files, - state.no_send_logs, - &state.options, - &extra_env, - JsMode::Auto, - ) - .await - { - Ok(value) => value, - Err(err) => { - return json_error_response( - actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, - &format!("{err:#}"), - ); + let mut spawned_list: Vec = Vec::new(); + for (language, files) in &partitions { + let runner = match language { + EvalLanguage::JavaScript => state.js_runner_override.as_deref(), + EvalLanguage::Python => state.python_runner_override.as_deref(), + }; + match spawn_eval_runner( + &state.base, + *language, + runner, + files, + state.no_send_logs, + &state.options, + &extra_env, + JsMode::Auto, + ) + .await + { + Ok(spawned) => spawned_list.push(spawned), + Err(err) => { + return json_error_response( + actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, + &format!("{err:#}"), + ); + } } - }; + } if stream_requested { let (tx, rx) = mpsc::unbounded_channel::(); - tokio::spawn(async move { - let mut saw_error = false; - let mut stderr_lines: Vec = Vec::new(); - let output = drive_eval_runner(spawned.process, ConsolePolicy::Forward, |event| { - if matches!(event, EvalEvent::Error { .. }) { - saw_error = true; - } - if matches!(event, EvalEvent::Done) { - return; - } - if let EvalEvent::Console { - ref stream, - ref message, - } = event - { - for line in message.lines() { - let _ = tx.send(format!(": [{stream}] {line}\n")); + let mut runner_handles = Vec::new(); + for spawned in spawned_list { + let tx_task = tx.clone(); + runner_handles.push(tokio::spawn(async move { + let mut saw_error = false; + let mut saw_404 = false; + let mut stderr_lines: Vec = Vec::new(); + let output = drive_eval_runner(spawned.process, ConsolePolicy::Forward, |event| { + if let EvalEvent::Error { + status: Some(404), .. + } = &event + { + saw_404 = true; + return; } - if stream == "stderr" { - stderr_lines.push(message.clone()); + if matches!(event, EvalEvent::Error { .. }) { + saw_error = true; } - return; - } - if let Some(encoded) = encode_eval_event_for_http(&event) { - let _ = tx.send(encoded); - } - }) - .await; - - match output { - Ok(output) => { - if !output.status.success() && !saw_error { - let mut detail = format!("Eval runner exited with {}.", output.status); - for line in stderr_lines.iter() { - detail.push('\n'); - detail.push_str(line); + if matches!(event, EvalEvent::Done) { + return; + } + if let EvalEvent::Console { + ref stream, + ref message, + } = event + { + for line in message.lines() { + let _ = tx_task.send(format!(": [{stream}] {line}\n")); + } + if stream == "stderr" { + stderr_lines.push(message.clone()); } - let error = - serialize_sse_event("error", &json!({ "message": detail }).to_string()); - let _ = tx.send(error); + return; + } + if let Some(encoded) = encode_eval_event_for_http(&event) { + let _ = tx_task.send(encoded); + } + }) + .await; + + match output { + Ok(output) => { + if !output.status.success() && !saw_error && !saw_404 { + let mut detail = format!("Eval runner exited with {}.", output.status); + for line in stderr_lines.iter() { + detail.push('\n'); + detail.push_str(line); + } + let error = serialize_sse_event( + "error", + &json!({ "message": detail }).to_string(), + ); + let _ = tx_task.send(error); + } + } + Err(err) => { + let error = serialize_sse_event( + "error", + &json!({ "message": format!("{err:#}") }).to_string(), + ); + let _ = tx_task.send(error); } } - Err(err) => { - let error = serialize_sse_event( - "error", - &json!({ "message": format!("{err:#}") }).to_string(), - ); - let _ = tx.send(error); - } + })); + } + // Coordination task: wait for all runner tasks, then send the SSE "done" event. + let tx_coord = tx.clone(); + tokio::spawn(async move { + for handle in runner_handles { + let _ = handle.await; } - - let _ = tx.send(serialize_sse_event("done", "")); + let _ = tx_coord.send(serialize_sse_event("done", "")); }); + drop(tx); let response_stream = stream::unfold(rx, |mut rx| async { rx.recv() @@ -1549,25 +1729,43 @@ async fn dev_server_eval( .streaming(response_stream); } - let mut summary: Option = None; - let mut errors: Vec<(String, Option)> = Vec::new(); - let output = - match drive_eval_runner( - spawned.process, - ConsolePolicy::Forward, - |event| match event { - EvalEvent::Summary(current) => summary = Some(current), - EvalEvent::Error { - message, - stack: _, - status, - } => errors.push((message, status)), - _ => {} - }, - ) - .await - { - Ok(output) => output, + // Non-streaming: drive all runners in parallel and aggregate results. + let mut non_streaming_handles = Vec::new(); + for spawned in spawned_list { + non_streaming_handles.push(tokio::spawn(async move { + let mut summary: Option = None; + let mut errors: Vec<(String, Option)> = Vec::new(); + let output = + drive_eval_runner( + spawned.process, + ConsolePolicy::Forward, + |event| match event { + EvalEvent::Summary(current) => summary = Some(current), + EvalEvent::Error { + message, + stack: _, + status, + } => errors.push((message, status)), + _ => {} + }, + ) + .await?; + Ok::<_, anyhow::Error>((summary, errors, output.status)) + })); + } + + let mut final_summary: Option = None; + let mut first_real_error: Option<(String, Option)> = None; + let mut any_failure = false; + for handle in non_streaming_handles { + let result = match handle.await { + Ok(Ok(r)) => r, + Ok(Err(err)) => { + return json_error_response( + actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, + &format!("{err:#}"), + ); + } Err(err) => { return json_error_response( actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, @@ -1575,17 +1773,31 @@ async fn dev_server_eval( ); } }; + let (summary, errors, status) = result; + if !status.success() { + any_failure = true; + } + for (message, error_status) in errors { + // Filter 404s: they come from the runner that does not own this evaluator. + if error_status != Some(404) && first_real_error.is_none() { + first_real_error = Some((message, error_status)); + } + } + if final_summary.is_none() { + final_summary = summary; + } + } - if let Some((message, status)) = errors.first() { - let status = status - .and_then(|status| actix_web::http::StatusCode::from_u16(status).ok()) + if let Some((message, status)) = first_real_error { + let http_status = status + .and_then(|s| actix_web::http::StatusCode::from_u16(s).ok()) .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); - return json_error_response(status, message); + return json_error_response(http_status, &message); } - if let Some(summary) = summary { + if let Some(summary) = final_summary { return HttpResponse::Ok().json(summary); } - if !output.status.success() { + if any_failure { return json_error_response( actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, "Eval runner exited with an error.", @@ -1900,42 +2112,6 @@ async fn build_env(base: &BaseArgs) -> Result> { Ok(envs) } -fn detect_eval_language( - files: &[String], - language_override: Option, -) -> Result { - if let Some(language) = language_override { - return Ok(language); - } - - let mut detected: Option = None; - for file in files { - let ext = PathBuf::from(file) - .extension() - .and_then(|ext| ext.to_str()) - .unwrap_or("") - .to_ascii_lowercase(); - let current = match ext.as_str() { - "py" => EvalLanguage::Python, - "ts" | "tsx" | "js" | "mjs" | "cjs" => EvalLanguage::JavaScript, - _ => { - anyhow::bail!("Unsupported eval file extension: {ext}"); - } - }; - if let Some(existing) = detected { - if existing != current { - anyhow::bail!( - "Mixed eval file types are not supported yet (found {existing:?} and {current:?})." - ); - } - } else { - detected = Some(current); - } - } - - detected.ok_or_else(|| anyhow::anyhow!("No eval files provided")) -} - const DEFAULT_EVAL_GLOBS: &[&str] = &[ "**/*.eval.ts", "**/*.eval.js", @@ -2182,11 +2358,6 @@ fn build_python_command( runner: &Path, files: &[String], ) -> Result { - let runner_override = runner_override - .map(ToOwned::to_owned) - .or_else(|| std::env::var("BT_EVAL_PYTHON_RUNNER").ok()) - .or_else(|| std::env::var("BT_EVAL_PYTHON").ok()); - let command = if let Some(explicit) = runner_override { let mut command = Command::new(explicit); command.arg(runner).args(files); @@ -4199,7 +4370,7 @@ mod tests { "BT_EVAL_DEV_HOST", "BT_EVAL_DEV_PORT", "BT_EVAL_DEV_ORG_NAME", - ]; + ]; // BT_EVAL_LANGUAGE intentionally omitted — deprecated, accepted but ignored let previous: Vec<(&str, Option)> = keys.iter().map(|key| (*key, clear_env_var(key))).collect(); set_env_var("BT_EVAL_JSONL", "true"); @@ -4238,4 +4409,256 @@ mod tests { restore_env_var(key, value); } } + + #[test] + fn eval_args_accepts_bt_eval_js_runner_env() { + let _guard = env_test_lock() + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let keys = ["BT_EVAL_JS_RUNNER", "BT_EVAL_RUNNER"]; + let previous: Vec<(&str, Option)> = + keys.iter().map(|key| (*key, clear_env_var(key))).collect(); + set_env_var("BT_EVAL_JS_RUNNER", "vite-node"); + + let parsed = EvalArgsHarness::try_parse_from(["bt", "sample.eval.ts"]) + .expect("env vars should parse into eval args"); + assert_eq!(parsed.eval.effective_js_runner(), Some("vite-node")); + + for (key, value) in previous { + restore_env_var(key, value); + } + } + + #[test] + fn eval_args_accepts_legacy_bt_eval_runner_env() { + let _guard = env_test_lock() + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let keys = ["BT_EVAL_JS_RUNNER", "BT_EVAL_RUNNER"]; + let previous: Vec<(&str, Option)> = + keys.iter().map(|key| (*key, clear_env_var(key))).collect(); + set_env_var("BT_EVAL_RUNNER", "tsx"); + + let parsed = EvalArgsHarness::try_parse_from(["bt", "sample.eval.ts"]) + .expect("env vars should parse into eval args"); + assert_eq!(parsed.eval.effective_js_runner(), Some("tsx")); + + for (key, value) in previous { + restore_env_var(key, value); + } + } + + #[test] + fn eval_args_prefers_bt_eval_js_runner_over_legacy_env() { + let _guard = env_test_lock() + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let keys = ["BT_EVAL_JS_RUNNER", "BT_EVAL_RUNNER"]; + let previous: Vec<(&str, Option)> = + keys.iter().map(|key| (*key, clear_env_var(key))).collect(); + set_env_var("BT_EVAL_JS_RUNNER", "vite-node"); + set_env_var("BT_EVAL_RUNNER", "tsx"); + + let parsed = EvalArgsHarness::try_parse_from(["bt", "sample.eval.ts"]) + .expect("env vars should parse into eval args"); + assert_eq!(parsed.eval.effective_js_runner(), Some("vite-node")); + + for (key, value) in previous { + restore_env_var(key, value); + } + } + + #[test] + fn partition_files_by_language_all_js() { + let files = vec![ + "a.eval.ts".to_string(), + "b.eval.js".to_string(), + "c.eval.mjs".to_string(), + ]; + let partitions = partition_files_by_language(&files).expect("should partition"); + assert_eq!(partitions.len(), 1); + assert_eq!(partitions[0].0, EvalLanguage::JavaScript); + assert_eq!(partitions[0].1, files); + } + + #[test] + fn partition_files_by_language_all_python() { + let files = vec!["eval_foo.py".to_string(), "eval_bar.py".to_string()]; + let partitions = partition_files_by_language(&files).expect("should partition"); + assert_eq!(partitions.len(), 1); + assert_eq!(partitions[0].0, EvalLanguage::Python); + assert_eq!(partitions[0].1, files); + } + + #[test] + fn partition_files_by_language_mixed_produces_two_partitions() { + let files = vec![ + "a.eval.ts".to_string(), + "eval_foo.py".to_string(), + "b.eval.js".to_string(), + "eval_bar.py".to_string(), + ]; + let partitions = partition_files_by_language(&files).expect("should partition"); + assert_eq!(partitions.len(), 2); + + let js = partitions + .iter() + .find(|(l, _)| *l == EvalLanguage::JavaScript) + .expect("JS partition should exist"); + assert_eq!(js.1, vec!["a.eval.ts", "b.eval.js"]); + + let py = partitions + .iter() + .find(|(l, _)| *l == EvalLanguage::Python) + .expect("Python partition should exist"); + assert_eq!(py.1, vec!["eval_foo.py", "eval_bar.py"]); + } + + #[test] + fn partition_files_by_language_errors_on_empty_input() { + let err = partition_files_by_language(&[]).expect_err("empty input should fail"); + assert!(err.to_string().contains("No eval files provided")); + } + + #[test] + fn partition_files_by_language_errors_on_unsupported_extension() { + let files = vec!["a.eval.rb".to_string()]; + let err = partition_files_by_language(&files).expect_err("unsupported ext should fail"); + assert!(err.to_string().contains("Unsupported eval file extension")); + assert!(err.to_string().contains("rb")); + } + + #[test] + fn partition_files_by_language_js_partition_comes_before_python() { + let files = vec!["eval_foo.py".to_string(), "a.eval.ts".to_string()]; + let partitions = partition_files_by_language(&files).expect("should partition"); + assert_eq!(partitions.len(), 2); + assert_eq!(partitions[0].0, EvalLanguage::JavaScript); + assert_eq!(partitions[1].0, EvalLanguage::Python); + } + + #[test] + fn build_eval_plans_single_js_file_no_runner_override() { + let files = vec!["a.eval.ts".to_string()]; + let plans = build_eval_plans(&files, None, None).expect("should build plans"); + assert_eq!(plans.len(), 1); + let plan = &plans[0]; + assert_eq!(plan.language, EvalLanguage::JavaScript); + assert_eq!(plan.files, files); + assert!(plan.runner_override.is_none()); + assert!(plan.show_js_hint); + assert!(matches!(plan.retry_policy, RetryPolicy::Allow)); + } + + #[test] + fn build_eval_plans_js_only_non_ts_disables_retry() { + let files = vec!["a.eval.js".to_string()]; + let plans = build_eval_plans(&files, None, None).expect("should build plans"); + assert_eq!(plans.len(), 1); + assert!(plans[0].show_js_hint, "hint should still show for plain JS"); + assert!( + matches!(plans[0].retry_policy, RetryPolicy::Disallow), + "no TS files means no ESM retry" + ); + } + + #[test] + fn build_eval_plans_runner_override_clears_js_hint() { + let files = vec!["a.eval.ts".to_string()]; + let plans = build_eval_plans(&files, Some("vite-node"), None).expect("should build plans"); + assert_eq!(plans.len(), 1); + let plan = &plans[0]; + assert!(!plan.show_js_hint); + assert_eq!(plan.runner_override.as_deref(), Some("vite-node")); + } + + #[test] + fn build_eval_plans_single_python_file() { + let files = vec!["eval_foo.py".to_string()]; + let plans = build_eval_plans(&files, None, None).expect("should build plans"); + assert_eq!(plans.len(), 1); + let plan = &plans[0]; + assert_eq!(plan.language, EvalLanguage::Python); + assert!(!plan.show_js_hint); + assert!(matches!(plan.retry_policy, RetryPolicy::Disallow)); + } + + #[test] + fn build_eval_plans_mixed_produces_two_plans() { + let files = vec!["a.eval.ts".to_string(), "eval_foo.py".to_string()]; + let plans = build_eval_plans(&files, None, None).expect("should build plans"); + assert_eq!(plans.len(), 2); + + let js_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::JavaScript) + .expect("JS plan should exist"); + assert_eq!(js_plan.files, vec!["a.eval.ts"]); + assert!(js_plan.show_js_hint); + + let py_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::Python) + .expect("Python plan should exist"); + assert_eq!(py_plan.files, vec!["eval_foo.py"]); + assert!(!py_plan.show_js_hint); + } + + #[test] + fn build_eval_plans_runner_override_routes_by_language() { + let files = vec!["a.eval.ts".to_string(), "eval_foo.py".to_string()]; + let plans = build_eval_plans(&files, Some("my-js-runner"), Some("my-py-runner")) + .expect("should build plans"); + assert_eq!(plans.len(), 2); + + let js_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::JavaScript) + .expect("JS plan should exist"); + assert_eq!(js_plan.runner_override.as_deref(), Some("my-js-runner")); + + let py_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::Python) + .expect("Python plan should exist"); + assert_eq!(py_plan.runner_override.as_deref(), Some("my-py-runner")); + } + + #[test] + fn build_eval_plans_js_runner_override_does_not_affect_python_plan() { + let files = vec!["a.eval.ts".to_string(), "eval_foo.py".to_string()]; + let plans = build_eval_plans(&files, Some("my-runner"), None).expect("should build plans"); + assert_eq!(plans.len(), 2); + + let js_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::JavaScript) + .expect("JS plan should exist"); + assert_eq!(js_plan.runner_override.as_deref(), Some("my-runner")); + + let py_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::Python) + .expect("Python plan should exist"); + assert!(py_plan.runner_override.is_none()); + } + + #[test] + fn build_eval_plans_python_runner_override_does_not_affect_js_plan() { + let files = vec!["a.eval.ts".to_string(), "eval_foo.py".to_string()]; + let plans = build_eval_plans(&files, None, Some("my-python")).expect("should build plans"); + assert_eq!(plans.len(), 2); + + let js_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::JavaScript) + .expect("JS plan should exist"); + assert!(js_plan.runner_override.is_none()); + + let py_plan = plans + .iter() + .find(|p| p.language == EvalLanguage::Python) + .expect("Python plan should exist"); + assert_eq!(py_plan.runner_override.as_deref(), Some("my-python")); + } } diff --git a/tests/eval_fixtures.rs b/tests/eval_fixtures.rs index 84c63a6..e165d25 100644 --- a/tests/eval_fixtures.rs +++ b/tests/eval_fixtures.rs @@ -13,9 +13,9 @@ use serde_json::Value; #[derive(Debug, Deserialize, Clone)] struct FixtureConfig { files: Vec, - runtime: Option, - runner: Option, - runners: Option>, + + runners_js: Option>, + runners_python: Option>, env: Option>, args: Option>, /// Args appended after the file list, e.g. `["--", "--description", "foo"]` @@ -55,13 +55,14 @@ fn eval_fixtures() { }; let mut fixture_dirs: Vec = Vec::new(); - for runtime_dir in ["js", "py"] { - let root_dir = fixtures_root.join(runtime_dir); - if !root_dir.exists() { - continue; - } - let mut dirs: Vec = fs::read_dir(&root_dir) - .expect("read fixtures dir") + for category_entry in fs::read_dir(&fixtures_root) + .expect("read evals root dir") + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.is_dir()) + { + let mut dirs: Vec = fs::read_dir(&category_entry) + .expect("read fixtures category dir") .filter_map(|entry| entry.ok()) .map(|entry| entry.path()) .filter(|path| path.is_dir()) @@ -85,22 +86,23 @@ fn eval_fixtures() { panic!("Fixture {fixture_name} has no files configured."); } - let runtime = config.runtime.as_deref().unwrap_or("node"); + let has_js = config_has_js_files(&config); + let has_python = config_has_python_files(&config); + let is_mixed = has_js && has_python; + if let Some(selected) = selected_runtimes.as_ref() { - if !selected.contains(runtime) { - eprintln!("Skipping {fixture_name} (runtime {runtime} filtered out)."); + let fixture_runtimes = config_fixture_runtimes(&config); + if fixture_runtimes.is_disjoint(selected) { + eprintln!("Skipping {fixture_name} (no matching runtimes)."); continue; } } - match runtime { - "node" => ensure_dependencies(&dir), - "bun" => ensure_dependencies(&dir), - "deno" => ensure_dependencies(&dir), - "python" => {} - other => panic!("Unsupported runtime for fixture {fixture_name}: {other}"), + + if has_js { + ensure_dependencies(&dir); } - let python_runner = if runtime == "python" { + let python_runner = if has_python { match ensure_python_env(&fixtures_root.join("py")) { Some(python) => Some(python), None => { @@ -117,81 +119,182 @@ fn eval_fixtures() { None }; - let runners = collect_runners(&config); let mut ran_variant = false; - for runner in runners { - if needs_bun(runtime, runner.as_deref()) && !command_exists("bun") { - if required_runtimes().contains("bun") { - panic!("Bun runtime is required but unavailable for fixture {fixture_name}"); + if is_mixed { + let variants = collect_mixed_runner_variants(&config); + for (js_runner, py_runner) in &variants { + if needs_bun(js_runner.as_deref()) && !command_exists("bun") { + if required_runtimes().contains("bun") { + panic!( + "Bun runtime is required but unavailable for fixture {fixture_name}" + ); + } + let label = js_runner.as_deref().unwrap_or("default"); + eprintln!("Skipping {fixture_name} [js={label}] (bun not installed)."); + continue; } - let label = runner.as_deref().unwrap_or("default"); - eprintln!("Skipping {fixture_name} [{label}] (bun not installed)."); - continue; - } - if needs_deno(runtime, runner.as_deref()) && !command_exists("deno") { - if required_runtimes().contains("deno") { - panic!("Deno runtime is required but unavailable for fixture {fixture_name}"); + if needs_deno(js_runner.as_deref()) && !command_exists("deno") { + if required_runtimes().contains("deno") { + panic!( + "Deno runtime is required but unavailable for fixture {fixture_name}" + ); + } + let label = js_runner.as_deref().unwrap_or("default"); + eprintln!("Skipping {fixture_name} [js={label}] (deno not installed)."); + continue; } - let label = runner.as_deref().unwrap_or("default"); - eprintln!("Skipping {fixture_name} [{label}] (deno not installed)."); - continue; - } - - let mut cmd = Command::new(&bt_path); - cmd.arg("eval"); - if let Some(args) = config.args.as_ref() { - cmd.args(args); - } - let resolved_runner = resolve_runner(&dir, runner.as_deref(), python_runner.as_ref()); - if let Some(runner_cmd) = resolved_runner.as_ref() { - cmd.arg("--runner").arg(runner_cmd); - } - cmd.args(&config.files); - if let Some(trailing_args) = config.trailing_args.as_ref() { - cmd.args(trailing_args); - } - cmd.current_dir(&dir); - cmd.env("BT_EVAL_LOCAL", "1"); - cmd.env( - "BRAINTRUST_API_KEY", - std::env::var("BRAINTRUST_API_KEY").unwrap_or_else(|_| "local".to_string()), - ); - if let Some(env) = config.env.as_ref() { - for (key, value) in env { - cmd.env(key, value); + let mut cmd = Command::new(&bt_path); + cmd.arg("eval"); + if let Some(args) = config.args.as_ref() { + cmd.args(args); } - } - - if runner.is_some() { - if let Some(tsx_path) = local_tsx_path(&dir) { - cmd.env("BT_EVAL_RUNNER", tsx_path); + if let Some(js_cmd) = js_runner.as_ref() { + if let Some(resolved) = resolve_runner(&dir, Some(js_cmd.as_str()), None) { + cmd.arg("--runner-js").arg(resolved); + } + } else if let Some(tsx_path) = local_tsx_path(&dir) { + cmd.arg("--runner-js").arg(tsx_path); } + if let Some(py_cmd) = py_runner.as_ref() { + if let Some(resolved) = + resolve_runner(&dir, Some(py_cmd.as_str()), python_runner.as_ref()) + { + cmd.arg("--runner-python").arg(resolved); + } + } else if let Some(python) = python_runner.as_ref() { + cmd.arg("--runner-python").arg(python); + } + cmd.args(&config.files); + if let Some(trailing_args) = config.trailing_args.as_ref() { + cmd.args(trailing_args); + } + cmd.current_dir(&dir); + cmd.env("BT_EVAL_LOCAL", "1"); + cmd.env( + "BRAINTRUST_API_KEY", + std::env::var("BRAINTRUST_API_KEY").unwrap_or_else(|_| "local".to_string()), + ); + if let Some(env) = config.env.as_ref() { + for (key, value) in env { + cmd.env(key, value); + } + } + let label = format!( + "js={}/py={}", + js_runner.as_deref().unwrap_or("default"), + py_runner.as_deref().unwrap_or("default"), + ); + let expect_success = config.expect_success.unwrap_or(true); + let output = cmd.output().expect("run bt eval"); + let status = output.status; + if status.success() != expect_success { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + panic!( + "Fixture {fixture_name} [{label}] had status {status} (expected success={expect_success})\nstdout:\n{stdout}\nstderr:\n{stderr}" + ); + } + ran_variant = true; } + } else { + let raw_runners = if has_python { + config.runners_python.clone().unwrap_or_default() + } else { + config.runners_js.clone().unwrap_or_default() + }; + let runners: Vec> = if raw_runners.is_empty() { + vec![None] + } else { + raw_runners + .into_iter() + .map(|r| if r == "default" { None } else { Some(r) }) + .collect() + }; + for runner in runners { + if needs_bun(runner.as_deref()) && !command_exists("bun") { + if required_runtimes().contains("bun") { + panic!( + "Bun runtime is required but unavailable for fixture {fixture_name}" + ); + } + let label = runner.as_deref().unwrap_or("default"); + eprintln!("Skipping {fixture_name} [{label}] (bun not installed)."); + continue; + } + if needs_deno(runner.as_deref()) && !command_exists("deno") { + if required_runtimes().contains("deno") { + panic!( + "Deno runtime is required but unavailable for fixture {fixture_name}" + ); + } + let label = runner.as_deref().unwrap_or("default"); + eprintln!("Skipping {fixture_name} [{label}] (deno not installed)."); + continue; + } - if let Some(python) = python_runner.as_ref() { - cmd.env("BT_EVAL_PYTHON_RUNNER", python); - } - - let expect_success = config.expect_success.unwrap_or(true); - let output = cmd.output().expect("run bt eval"); - let status = output.status; - if status.success() != expect_success { - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - let deno_diagnostics = - if needs_deno(runtime, resolved_runner.as_deref()) && expect_success { - collect_deno_eval_diagnostics(&dir, &config.files) + let mut cmd = Command::new(&bt_path); + cmd.arg("eval"); + if let Some(args) = config.args.as_ref() { + cmd.args(args); + } + let resolved_runner = + resolve_runner(&dir, runner.as_deref(), python_runner.as_ref()); + if let Some(runner_cmd) = resolved_runner.as_ref() { + let runner_flag = if has_python { + "--runner-python" } else { - None + "--runner-js" }; - panic!( - "Fixture {fixture_name} [{}] had status {status} (expected success={expect_success})\nstdout:\n{stdout}\nstderr:\n{stderr}{}", - deno_diagnostics.unwrap_or_default(), - runner.as_deref().unwrap_or("default") + cmd.arg(runner_flag).arg(runner_cmd); + } + cmd.args(&config.files); + if let Some(trailing_args) = config.trailing_args.as_ref() { + cmd.args(trailing_args); + } + cmd.current_dir(&dir); + cmd.env("BT_EVAL_LOCAL", "1"); + cmd.env( + "BRAINTRUST_API_KEY", + std::env::var("BRAINTRUST_API_KEY").unwrap_or_else(|_| "local".to_string()), ); + + if let Some(env) = config.env.as_ref() { + for (key, value) in env { + cmd.env(key, value); + } + } + + if runner.is_some() { + if let Some(tsx_path) = local_tsx_path(&dir) { + cmd.env("BT_EVAL_RUNNER", tsx_path); + } + } + + if let Some(python) = python_runner.as_ref() { + cmd.env("BT_EVAL_PYTHON_RUNNER", python); + } + + let expect_success = config.expect_success.unwrap_or(true); + let output = cmd.output().expect("run bt eval"); + let status = output.status; + if status.success() != expect_success { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let deno_diagnostics = + if needs_deno(resolved_runner.as_deref()) && expect_success { + collect_deno_eval_diagnostics(&dir, &config.files) + } else { + None + }; + panic!( + "Fixture {fixture_name} [{}] had status {status} (expected success={expect_success})\nstdout:\n{stdout}\nstderr:\n{stderr}{}", + deno_diagnostics.unwrap_or_default(), + runner.as_deref().unwrap_or("default") + ); + } + ran_variant = true; } - ran_variant = true; } if !ran_variant { @@ -227,6 +330,7 @@ fn eval_watch_js_dependency_retriggers() { &bt_path, &fixture_dir, &runner, + false, "tests/async-import.eval.ts", "tests/helper.js", ); @@ -254,6 +358,7 @@ fn eval_watch_bun_dependency_retriggers() { &bt_path, &fixture_dir, "bun", + false, "tests/async-import.eval.ts", "tests/helper.js", ); @@ -281,6 +386,7 @@ fn eval_watch_deno_dependency_retriggers() { &bt_path, &fixture_dir, "deno", + false, "tests/basic.eval.ts", "tests/helper.ts", ); @@ -311,6 +417,7 @@ fn eval_watch_python_dependency_retriggers() { &bt_path, &fixture_dir, python.to_string_lossy().as_ref(), + true, "eval_local_import.py", "helper.py", ); @@ -513,6 +620,7 @@ fn assert_watch_detects_dependency_change( bt_path: &Path, fixture_dir: &Path, runner: &str, + is_python: bool, entry_file: &str, dependency_file: &str, ) { @@ -520,10 +628,15 @@ fn assert_watch_detects_dependency_change( let _restore_guard = FileRestoreGuard::new(dep_path.clone()); let mut cmd = Command::new(bt_path); + let runner_flag = if is_python { + "--runner-python" + } else { + "--runner-js" + }; cmd.arg("eval") .arg("--watch") .arg("--no-send-logs") - .arg("--runner") + .arg(runner_flag) .arg(runner) .arg(entry_file) .current_dir(fixture_dir) @@ -643,21 +756,27 @@ fn wait_for_output( } } -fn collect_runners(config: &FixtureConfig) -> Vec> { - if let Some(runners) = config.runners.as_ref() { - return runners - .iter() - .map(|value| { - if value == "default" { - None - } else { - Some(value.clone()) - } - }) - .collect(); +fn collect_mixed_runner_variants(config: &FixtureConfig) -> Vec<(Option, Option)> { + let default = vec!["default".to_string()]; + let js_runners = config.runners_js.as_deref().unwrap_or(&default); + let py_runners = config.runners_python.as_deref().unwrap_or(&default); + let mut variants = Vec::new(); + for js in js_runners { + for py in py_runners { + let js_val = if js == "default" { + None + } else { + Some(js.clone()) + }; + let py_val = if py == "default" { + None + } else { + Some(py.clone()) + }; + variants.push((js_val, py_val)); + } } - - vec![config.runner.clone()] + variants } fn resolve_runner(dir: &Path, runner: Option<&str>, python: Option<&PathBuf>) -> Option { @@ -681,12 +800,62 @@ fn local_tsx_path(dir: &Path) -> Option { tsx_path.is_file().then_some(tsx_path) } -fn needs_bun(runtime: &str, runner: Option<&str>) -> bool { - runtime == "bun" || runner == Some("bun") +fn needs_bun(runner: Option<&str>) -> bool { + runner == Some("bun") +} + +fn needs_deno(runner: Option<&str>) -> bool { + runner == Some("deno") +} + +fn config_has_js_files(config: &FixtureConfig) -> bool { + config.files.iter().any(|f| { + matches!( + Path::new(f) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""), + "ts" | "tsx" | "js" | "mjs" | "cjs" + ) + }) +} + +fn config_has_python_files(config: &FixtureConfig) -> bool { + config.files.iter().any(|f| { + Path::new(f) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + == "py" + }) } -fn needs_deno(runtime: &str, runner: Option<&str>) -> bool { - runtime == "deno" || runner == Some("deno") +fn config_fixture_runtimes(config: &FixtureConfig) -> BTreeSet { + let mut runtimes = BTreeSet::new(); + if config_has_python_files(config) { + runtimes.insert("python".to_string()); + } + if config_has_js_files(config) { + let js_runners: Vec = config.runners_js.as_deref().unwrap_or(&[]).to_vec(); + if js_runners.is_empty() { + runtimes.insert("node".to_string()); + } else { + for r in &js_runners { + match r.as_str() { + "bun" => { + runtimes.insert("bun".to_string()); + } + "deno" => { + runtimes.insert("deno".to_string()); + } + _ => { + runtimes.insert("node".to_string()); + } + } + } + } + } + runtimes } fn required_runtimes() -> BTreeSet { diff --git a/tests/evals/js/eval-bun/fixture.json b/tests/evals/js/eval-bun/fixture.json index cd4c78c..3a0f4a0 100644 --- a/tests/evals/js/eval-bun/fixture.json +++ b/tests/evals/js/eval-bun/fixture.json @@ -1,5 +1,4 @@ { - "runtime": "bun", - "runner": "bun", - "files": ["tests/bun-imports.eval.ts", "tests/bun-file-api.eval.ts"] + "files": ["tests/bun-imports.eval.ts", "tests/bun-file-api.eval.ts"], + "runners_js": ["bun"] } diff --git a/tests/evals/js/eval-cjs-monorepo/fixture.json b/tests/evals/js/eval-cjs-monorepo/fixture.json index 64dc73e..2707f7b 100644 --- a/tests/evals/js/eval-cjs-monorepo/fixture.json +++ b/tests/evals/js/eval-cjs-monorepo/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.cjs"], - "runners": ["tsx", "vite-node", "bun", "deno"] + "runners_js": ["tsx", "vite-node", "bun", "deno"] } diff --git a/tests/evals/js/eval-deno/fixture.json b/tests/evals/js/eval-deno/fixture.json index 65802f3..373ed54 100644 --- a/tests/evals/js/eval-deno/fixture.json +++ b/tests/evals/js/eval-deno/fixture.json @@ -1,5 +1,4 @@ { - "runtime": "deno", - "runner": "deno", - "files": ["tests/basic.eval.ts"] + "files": ["tests/basic.eval.ts"], + "runners_js": ["deno"] } diff --git a/tests/evals/js/eval-esm-monorepo/fixture.json b/tests/evals/js/eval-esm-monorepo/fixture.json index e54c5e0..76fa6a8 100644 --- a/tests/evals/js/eval-esm-monorepo/fixture.json +++ b/tests/evals/js/eval-esm-monorepo/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.mjs"], - "runners": ["tsx", "vite-node", "bun", "deno"] + "runners_js": ["tsx", "vite-node", "bun", "deno"] } diff --git a/tests/evals/js/eval-esm/fixture.json b/tests/evals/js/eval-esm/fixture.json index 00a8b62..461c7b9 100644 --- a/tests/evals/js/eval-esm/fixture.json +++ b/tests/evals/js/eval-esm/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.mjs", "tests/top-level-await.eval.mjs"], - "runners": ["tsx", "vite-node", "bun", "deno"] + "runners_js": ["tsx", "vite-node", "bun", "deno"] } diff --git a/tests/evals/js/eval-extra-args/fixture.json b/tests/evals/js/eval-extra-args/fixture.json index 9ac1c1d..0819d24 100644 --- a/tests/evals/js/eval-extra-args/fixture.json +++ b/tests/evals/js/eval-extra-args/fixture.json @@ -1,5 +1,5 @@ { "files": ["tests/extra-args.eval.mjs"], "trailing_args": ["--", "--description", "test-desc", "--shard=1/4"], - "runners": ["tsx"] + "runners_js": ["tsx"] } diff --git a/tests/evals/js/eval-glob/fixture.json b/tests/evals/js/eval-glob/fixture.json index 66091d1..2528b6c 100644 --- a/tests/evals/js/eval-glob/fixture.json +++ b/tests/evals/js/eval-glob/fixture.json @@ -1,4 +1,4 @@ { "files": [".", "tests/**/*.eval.mjs"], - "runners": ["tsx"] + "runners_js": ["tsx"] } diff --git a/tests/evals/js/eval-ts-esm-only-dep/fixture.json b/tests/evals/js/eval-ts-esm-only-dep/fixture.json index 96e4cb3..597eee8 100644 --- a/tests/evals/js/eval-ts-esm-only-dep/fixture.json +++ b/tests/evals/js/eval-ts-esm-only-dep/fixture.json @@ -1,5 +1,5 @@ { - "runners": ["default"], "files": ["tests/basic.eval.ts"], - "expect_success": true + "expect_success": true, + "runners_js": ["default"] } diff --git a/tests/evals/js/eval-ts-esm/fixture.json b/tests/evals/js/eval-ts-esm/fixture.json index 5ed3516..1ea5754 100644 --- a/tests/evals/js/eval-ts-esm/fixture.json +++ b/tests/evals/js/eval-ts-esm/fixture.json @@ -1,4 +1,4 @@ { - "runners": ["tsx", "bun"], - "files": ["tests/basic.eval.ts", "tests/top-level-await.eval.ts"] + "files": ["tests/basic.eval.ts", "tests/top-level-await.eval.ts"], + "runners_js": ["tsx", "bun"] } diff --git a/tests/evals/js/eval-ts-local-sdk/fixture.json b/tests/evals/js/eval-ts-local-sdk/fixture.json index 96e4cb3..597eee8 100644 --- a/tests/evals/js/eval-ts-local-sdk/fixture.json +++ b/tests/evals/js/eval-ts-local-sdk/fixture.json @@ -1,5 +1,5 @@ { - "runners": ["default"], "files": ["tests/basic.eval.ts"], - "expect_success": true + "expect_success": true, + "runners_js": ["default"] } diff --git a/tests/evals/js/eval-ts-monorepo/fixture.json b/tests/evals/js/eval-ts-monorepo/fixture.json index bda881f..a001dfd 100644 --- a/tests/evals/js/eval-ts-monorepo/fixture.json +++ b/tests/evals/js/eval-ts-monorepo/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.ts"], - "runners": ["tsx", "vite-node", "bun"] + "runners_js": ["tsx", "vite-node", "bun"] } diff --git a/tests/evals/js/eval-ts-vite-monorepo/fixture.json b/tests/evals/js/eval-ts-vite-monorepo/fixture.json index 4e6a7c7..3348937 100644 --- a/tests/evals/js/eval-ts-vite-monorepo/fixture.json +++ b/tests/evals/js/eval-ts-vite-monorepo/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.ts"], - "runners": ["vite-node"] + "runners_js": ["vite-node"] } diff --git a/tests/evals/js/eval-vite-node-cjs/fixture.json b/tests/evals/js/eval-vite-node-cjs/fixture.json index 4e6a7c7..3348937 100644 --- a/tests/evals/js/eval-vite-node-cjs/fixture.json +++ b/tests/evals/js/eval-vite-node-cjs/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.ts"], - "runners": ["vite-node"] + "runners_js": ["vite-node"] } diff --git a/tests/evals/js/eval-vite-node/fixture.json b/tests/evals/js/eval-vite-node/fixture.json index c78ec7a..20efc8f 100644 --- a/tests/evals/js/eval-vite-node/fixture.json +++ b/tests/evals/js/eval-vite-node/fixture.json @@ -1,4 +1,4 @@ { - "runner": "./node_modules/.bin/vite-node", - "files": ["tests/basic.eval.ts", "tests/top-level-await.eval.ts"] + "files": ["tests/basic.eval.ts", "tests/top-level-await.eval.ts"], + "runners_js": ["./node_modules/.bin/vite-node"] } diff --git a/tests/evals/js/eval-vite/fixture.json b/tests/evals/js/eval-vite/fixture.json index 4e6a7c7..3348937 100644 --- a/tests/evals/js/eval-vite/fixture.json +++ b/tests/evals/js/eval-vite/fixture.json @@ -1,4 +1,4 @@ { "files": ["tests/basic.eval.ts"], - "runners": ["vite-node"] + "runners_js": ["vite-node"] } diff --git a/tests/evals/py/absolute/fixture.json b/tests/evals/py/absolute/fixture.json index 204b039..f2457a3 100644 --- a/tests/evals/py/absolute/fixture.json +++ b/tests/evals/py/absolute/fixture.json @@ -1,4 +1,3 @@ { - "runtime": "python", "files": ["pkg/eval_absolute.py"] } diff --git a/tests/evals/py/atexit_flush/fixture.json b/tests/evals/py/atexit_flush/fixture.json index e840f9c..0c9f642 100644 --- a/tests/evals/py/atexit_flush/fixture.json +++ b/tests/evals/py/atexit_flush/fixture.json @@ -1 +1,3 @@ -{ "runtime": "python", "files": ["eval_atexit.py"] } +{ + "files": ["eval_atexit.py"] +} diff --git a/tests/evals/py/basic/fixture.json b/tests/evals/py/basic/fixture.json index 222d4ed..62b0316 100644 --- a/tests/evals/py/basic/fixture.json +++ b/tests/evals/py/basic/fixture.json @@ -1,4 +1,3 @@ { - "runtime": "python", "files": ["eval_basic.py"] } diff --git a/tests/evals/py/local_import/fixture.json b/tests/evals/py/local_import/fixture.json index 8270680..69309af 100644 --- a/tests/evals/py/local_import/fixture.json +++ b/tests/evals/py/local_import/fixture.json @@ -1,4 +1,3 @@ { - "runtime": "python", "files": ["eval_local_import.py"] } diff --git a/tests/evals/py/relative/fixture.json b/tests/evals/py/relative/fixture.json index 9f525f3..b0e4c19 100644 --- a/tests/evals/py/relative/fixture.json +++ b/tests/evals/py/relative/fixture.json @@ -1,4 +1,3 @@ { - "runtime": "python", "files": ["pkg/eval_relative.py"] } diff --git a/tests/evals/py/streaming/fixture.json b/tests/evals/py/streaming/fixture.json index 11cc488..619500c 100644 --- a/tests/evals/py/streaming/fixture.json +++ b/tests/evals/py/streaming/fixture.json @@ -1 +1,3 @@ -{ "runtime": "python", "files": ["eval_streaming.py"] } +{ + "files": ["eval_streaming.py"] +}