From fcae7d7b7797a74f5f72226b63ef076e3cf1617c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Fri, 20 Mar 2026 23:39:31 +0000 Subject: [PATCH 1/8] chore:remove dead code --- py/src/braintrust/framework.py | 4 ++-- py/src/braintrust/logger.py | 5 ++--- .../braintrust/wrappers/agno/_test_agno_helpers.py | 14 +++++++------- .../wrappers/claude_agent_sdk/_test_transport.py | 4 ++-- .../wrappers/test_pydantic_ai_integration.py | 4 +--- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index d80fb1f9..05a92ff6 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -203,7 +203,7 @@ def tags(self) -> Sequence[str]: """ @abc.abstractmethod - def report_progress(self, progress: TaskProgressEvent) -> None: + def report_progress(self, _progress: TaskProgressEvent) -> None: """ Report progress that will show up in the playground. """ @@ -459,7 +459,7 @@ class EvalResultWithSummary(SerializableDataClass, Generic[Input, Output]): summary: ExperimentSummary results: list[EvalResult[Input, Output]] - def _repr_pretty_(self, p, cycle): + def _repr_pretty_(self, p, _cycle): p.text(f'EvalResultWithSummary(summary="...", results=[...])') diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index a9ba479b..54e45d84 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -1437,7 +1437,7 @@ def _register_dropped_item_count(self, num_items): self._queue_drop_logging_state["last_logged_timestamp"] = time_now @staticmethod - def _write_payload_to_dir(payload_dir, payload, debug_logging_adjective=None): + def _write_payload_to_dir(payload_dir, payload): payload_file = os.path.join(payload_dir, f"payload_{time.time()}_{str(uuid.uuid4())[:8]}.json") try: os.makedirs(payload_dir, exist_ok=True) @@ -2831,7 +2831,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any]) # Note that this only checks properties that are expected of a complete event. # _validate_and_sanitize_experiment_log_partial_args should still be invoked # (after handling special fields like 'id'). -def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any], has_dataset: bool) -> Mapping[str, Any]: +def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any]) -> Mapping[str, Any]: input = event.get("input") inputs = event.get("inputs") if (input is not None and inputs is not None) or (input is None and inputs is None): @@ -3861,7 +3861,6 @@ def log( metrics=metrics, id=id, ), - self.dataset is not None, ) span = self._start_span_impl(start_time=self.last_start_time, lookup_span_parent=False, **event) self.last_start_time = span.end() diff --git a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py index fcb926e1..00fa2ce0 100644 --- a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py +++ b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py @@ -56,10 +56,10 @@ def __init__(self): self.name = name self.steps = ["first-step"] - async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None): + async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None): return FakeWorkflowRunResponse(input=execution_input.input, content="workflow-async") - def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None): + def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None): yield FakeEvent("WorkflowStarted", content=None) yield FakeEvent("StepStarted", content=None) yield FakeEvent("StepCompleted", content="hello ") @@ -74,7 +74,7 @@ def __init__(self): self.name = name self.steps = ["first-step"] - def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None): + def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None): yield FakeEvent("StepCompleted", content="hello") yield FakeEvent("WorkflowCompleted", content="hello", metrics=FakeMetrics(), status="COMPLETED") @@ -87,7 +87,7 @@ def __init__(self): self.name = name self.steps = ["first-step"] - def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None): + def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None): yield FakeEvent("WorkflowStarted", content=None) yield FakeEvent("StepCompleted", content="hello ") workflow_run_response.content = "world" @@ -115,7 +115,7 @@ def __init__(self): self.steps = ["agent-step"] self.agent = WrappedAgent() - async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None): + async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None): return await self.agent.arun(execution_input.input) return FakeWorkflow @@ -128,7 +128,7 @@ def __init__(self): self.id = "workflow-agent-123" self.steps = ["agent-step"] - def _execute_workflow_agent(self, user_input, session, execution_input, run_context, stream=False, **kwargs): + def _execute_workflow_agent(self, user_input, session, execution_input, _run_context, stream=False, **kwargs): if stream: def _stream(): @@ -143,7 +143,7 @@ def _stream(): return _stream() return FakeRunOutput(f"{user_input}-sync") - async def _aexecute_workflow_agent(self, user_input, run_context, execution_input, stream=False, **kwargs): + async def _aexecute_workflow_agent(self, user_input, _run_context, execution_input, stream=False, **kwargs): if stream: async def _astream(): diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py index 3a516568..cb95e336 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py @@ -68,8 +68,8 @@ def _normalize_write(data: str, *, sanitize: bool = False) -> dict[str, Any]: async def _empty_stream(): - return - yield {} # type: ignore[unreachable] + for _ in (): + yield {} def _normalize_for_match(value: Any) -> Any: diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py index b794b18b..81de2ea4 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py +++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py @@ -184,13 +184,11 @@ async def fake_run_chat( *, stream, agent, - deps, - console, - code_theme, prog_name, message_history, model_settings=None, usage_limits=None, + **_, ): assert stream is True assert prog_name == "braintrust-cli" From 6e8b52776cc1c654152a64c0d8faa493765598c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Mon, 23 Mar 2026 17:58:29 +0000 Subject: [PATCH 2/8] chore: add vulture to pyproject.toml --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b7d159c8..31230cc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,3 +20,8 @@ split-on-trailing-comma = true asyncio_mode = "strict" asyncio_default_fixture_loop_scope = "function" addopts = "--durations=3 --durations-min=0.1" + +[tool.vulture] +paths = ["py/src"] +ignore_names = ["with_simulate_login", "reset_id_generator_state", "dataset_record_id"] # pytest fixtures and deprecated-but-public API parameters +min_confidence = 100 From 75e304c54c7037109fc12bd15861b0a0e7df57fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Mon, 23 Mar 2026 20:53:19 +0000 Subject: [PATCH 3/8] chore: add vulture to pre-commit --- .pre-commit-config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e9df688d..7ea78815 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,3 +32,8 @@ repos: args: - "-L" - "rouge,coo,couldn,unsecure,ontext,afterall,als" + - repo: https://github.com/jendrikseipp/vulture + rev: v2.15 + hooks: + - id: vulture + pass_filenames: false From bfa3b2c5f4e63b610a083a7f10ef54308ec03f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Tue, 24 Mar 2026 18:52:42 +0000 Subject: [PATCH 4/8] chore: remove probably unused code (need human review) --- py/src/braintrust/http_headers.py | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 py/src/braintrust/http_headers.py diff --git a/py/src/braintrust/http_headers.py b/py/src/braintrust/http_headers.py deleted file mode 100644 index 138a1f03..00000000 --- a/py/src/braintrust/http_headers.py +++ /dev/null @@ -1,4 +0,0 @@ -BT_FOUND_EXISTING_HEADER = "x-bt-found-existing" -BT_CURSOR_HEADER = "x-bt-cursor" -BT_IMPERSONATE_USER = "x-bt-impersonate-user" -BT_PARENT = "x-bt-parent" From 2690c1a2a9606cb39456503e89ccd9df81898fb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Tue, 24 Mar 2026 22:31:31 +0000 Subject: [PATCH 5/8] chore: remove probably unused code (need human review) --- py/src/braintrust/cli/eval.py | 2 +- py/src/braintrust/cli/install/logs.py | 1 - py/src/braintrust/db_fields.py | 9 --- py/src/braintrust/framework.py | 27 ++----- py/src/braintrust/logger.py | 5 -- py/src/braintrust/parameters.py | 7 -- py/src/braintrust/queue.py | 2 - py/src/braintrust/wrappers/adk/__init__.py | 2 - py/src/braintrust/wrappers/anthropic.py | 1 - .../wrappers/claude_agent_sdk/_wrapper.py | 4 - py/src/braintrust/wrappers/langchain.py | 2 - py/src/braintrust/wrappers/pydantic_ai.py | 74 ------------------- 12 files changed, 9 insertions(+), 127 deletions(-) diff --git a/py/src/braintrust/cli/eval.py b/py/src/braintrust/cli/eval.py index f0e5dc89..595c2c0a 100644 --- a/py/src/braintrust/cli/eval.py +++ b/py/src/braintrust/cli/eval.py @@ -246,7 +246,7 @@ def check_match(path_input, include_patterns, exclude_patterns): def collect_files(input_path): if os.path.isdir(input_path): - for root, dirs, files in os.walk(input_path): + for root, _, files in os.walk(input_path): for file in files: fname = os.path.join(root, file) if check_match(fname, INCLUDE, EXCLUDE): diff --git a/py/src/braintrust/cli/install/logs.py b/py/src/braintrust/cli/install/logs.py index 2b840aec..4d46ad87 100644 --- a/py/src/braintrust/cli/install/logs.py +++ b/py/src/braintrust/cli/install/logs.py @@ -88,7 +88,6 @@ def get_events(stream): with ThreadPoolExecutor(8) as executor: events = executor.map(get_events, all_streams) - last_ts = None for stream, log in zip(all_streams, events): print(f"---- LOG STREAM: {stream['logStreamName']}") for event in log["events"]: diff --git a/py/src/braintrust/db_fields.py b/py/src/braintrust/db_fields.py index a89b9710..6fd95df4 100644 --- a/py/src/braintrust/db_fields.py +++ b/py/src/braintrust/db_fields.py @@ -1,21 +1,12 @@ TRANSACTION_ID_FIELD = "_xact_id" OBJECT_DELETE_FIELD = "_object_delete" -CREATED_FIELD = "created" -ID_FIELD = "id" IS_MERGE_FIELD = "_is_merge" -MERGE_PATHS_FIELD = "_merge_paths" -ARRAY_DELETE_FIELD = "_array_delete" AUDIT_SOURCE_FIELD = "_audit_source" AUDIT_METADATA_FIELD = "_audit_metadata" VALID_SOURCES = ["app", "api", "external"] -PARENT_ID_FIELD = "_parent_id" - -ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control" -SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring" - # Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to. OBJECT_ID_KEYS = ( "experiment_id", diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index 05a92ff6..f1acf9b8 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -62,15 +62,15 @@ # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal class bcolors: - HEADER = "\033[95m" - OKBLUE = "\033[94m" - OKCYAN = "\033[96m" - OKGREEN = "\033[92m" +# HEADER = "\033[95m" +# OKBLUE = "\033[94m" +# OKCYAN = "\033[96m" +# OKGREEN = "\033[92m" WARNING = "\033[93m" FAIL = "\033[91m" ENDC = "\033[0m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" +# BOLD = "\033[1m" +# UNDERLINE = "\033[4m" @dataclasses.dataclass @@ -228,17 +228,6 @@ def parameters(self) -> ValidatedParameters | None: """ -class EvalScorerArgs(SerializableDataClass, Generic[Input, Output]): - """ - Arguments passed to an evaluator scorer. This includes the input, expected output, actual output, and metadata. - """ - - input: Input - output: Output - expected: Output | None = None - metadata: Metadata | None = None - - OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]] @@ -850,7 +839,7 @@ async def EvalAsync( :param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`. :param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation. :param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function - that takes an `EvalScorerArgs` object and returns a `Score` object. + that takes `(input, output, expected)` arguments and returns a `Score` object. :param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically. :param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results. @@ -977,7 +966,7 @@ def Eval( :param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`. :param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation. :param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function - that takes an `EvalScorerArgs` object and returns a `Score` object. + that takes `(input, output, expected)` arguments and returns a `Score` object. :param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically. :param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results. diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 54e45d84..84260023 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -1060,9 +1060,6 @@ def __init__(self, api_conn: LazyValue[HTTPConnection]): self.logger = logging.getLogger("braintrust") self.queue: "LogQueue[LazyValue[Dict[str, Any]]]" = LogQueue(maxsize=self.queue_maxsize) - # Counter for tracking overflow uploads (useful for testing) - self._overflow_upload_count = 0 - if not disable_atexit_flush: atexit.register(self._finalize) @@ -1374,8 +1371,6 @@ def _submit_logs_request(self, items: Sequence[LogItemWithMeta], max_request_siz except Exception as e: error = e if error is None and resp is not None and resp.ok: - if overflow_rows: - self._overflow_upload_count += 1 return if error is None and resp is not None: resp_errmsg = f"{resp.status_code}: {resp.text}" diff --git a/py/src/braintrust/parameters.py b/py/src/braintrust/parameters.py index 595ba3ce..ac9d4a86 100644 --- a/py/src/braintrust/parameters.py +++ b/py/src/braintrust/parameters.py @@ -63,13 +63,6 @@ def from_function_row(cls, row: dict[str, Any]) -> "RemoteEvalParameters": data=function_data.get("data") or {}, ) - def validate(self, data: Any) -> bool: - try: - validate_json_schema(data, self.schema) - return True - except ValueError: - return False - def _pydantic_to_json_schema(model: Any) -> dict[str, Any]: """Convert a pydantic model to JSON schema.""" diff --git a/py/src/braintrust/queue.py b/py/src/braintrust/queue.py index ff6fc6cf..cfd5e834 100644 --- a/py/src/braintrust/queue.py +++ b/py/src/braintrust/queue.py @@ -32,7 +32,6 @@ def __init__(self, maxsize: int = 0): self._mutex = threading.Lock() self._queue: deque[T] = deque(maxlen=maxsize) self._has_items_event = threading.Event() - self._total_dropped = 0 self._enforce_size_limit = False def enforce_queue_size_limit(self, enforce: bool) -> None: @@ -68,7 +67,6 @@ def put(self, item: T) -> list[T]: while len(self._queue) >= self.maxsize: dropped_item = self._queue.popleft() dropped.append(dropped_item) - self._total_dropped += 1 self._queue.append(item) # Signal that items are available if queue was not empty before or item was added diff --git a/py/src/braintrust/wrappers/adk/__init__.py b/py/src/braintrust/wrappers/adk/__init__.py index 6c6b8a14..3f9036ab 100644 --- a/py/src/braintrust/wrappers/adk/__init__.py +++ b/py/src/braintrust/wrappers/adk/__init__.py @@ -412,8 +412,6 @@ def _determine_llm_call_type(llm_request: Any, model_response: Any = None) -> st request_dict = cast(dict[str, Any], bt_safe_deep_copy(llm_request)) # Check if there are tools in the config - has_tools = bool(request_dict.get("config", {}).get("tools")) - # Check the conversation history for function responses contents = request_dict.get("contents", []) has_function_response = False diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py index 8357fc1e..d9169f29 100644 --- a/py/src/braintrust/wrappers/anthropic.py +++ b/py/src/braintrust/wrappers/anthropic.py @@ -239,7 +239,6 @@ def __init__(self, msg_stream, span, request_start_time: float): super().__init__(msg_stream) self.__msg_stream = msg_stream self.__span = span - self.__metrics = {} self.__snapshot = None self.__request_start_time = request_start_time self.__time_to_first_token: float | None = None diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index e019241d..71460302 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -79,10 +79,6 @@ def release(self) -> None: _NOOP_ACTIVE_TOOL_SPAN = _NoopActiveToolSpan() -def _log_tracing_warning(exc: Exception) -> None: - log.warning("Error in tracing code", exc_info=exc) - - def _parse_tool_name(tool_name: Any) -> ParsedToolName: raw_name = str(tool_name) if tool_name is not None else DEFAULT_TOOL_NAME diff --git a/py/src/braintrust/wrappers/langchain.py b/py/src/braintrust/wrappers/langchain.py index 6beeb578..28924196 100644 --- a/py/src/braintrust/wrappers/langchain.py +++ b/py/src/braintrust/wrappers/langchain.py @@ -11,14 +11,12 @@ try: from langchain.callbacks.base import BaseCallbackHandler from langchain.schema import Document - from langchain.schema.agent import AgentAction from langchain.schema.messages import BaseMessage from langchain.schema.output import LLMResult except ImportError: _logger.warning("Failed to import langchain, using stubs") BaseCallbackHandler = object Document = object - AgentAction = object BaseMessage = object LLMResult = object diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py index e3442b85..6dd7ca45 100644 --- a/py/src/braintrust/wrappers/pydantic_ai.py +++ b/py/src/braintrust/wrappers/pydantic_ai.py @@ -327,80 +327,6 @@ def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): return wrapper -def wrap_model_request(original_func: Any) -> Any: - async def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - with start_span( - name="model_request", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = await original_func(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - return wrapper - - -def wrap_model_request_sync(original_func: Any) -> Any: - def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - with start_span( - name="model_request_sync", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = original_func(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - return wrapper - - -def wrap_model_request_stream(original_func: Any) -> Any: - def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - return _DirectStreamWrapper( - original_func(*args, **kwargs), - "model_request_stream", - input_data, - metadata, - ) - - return wrapper - - -def wrap_model_request_stream_sync(original_func: Any) -> Any: - def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - return _DirectStreamWrapperSync( - original_func(*args, **kwargs), - "model_request_stream_sync", - input_data, - metadata, - ) - - return wrapper - - def wrap_model_classes(): """Wrap Model classes to capture internal model requests made by agents.""" try: From 1646673a623b4bfc2628e134beb88af6ace19ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Tue, 24 Mar 2026 23:22:06 +0000 Subject: [PATCH 6/8] chore: remove probably unused code (need human review) --- py/src/braintrust/framework.py | 10 ++++++---- py/src/braintrust/otel/test_distributed_tracing.py | 1 - py/src/braintrust/otel/test_otel_bt_integration.py | 1 - py/src/braintrust/test_http.py | 5 ----- py/src/braintrust/wrappers/anthropic.py | 4 ---- py/src/braintrust/wrappers/google_genai/__init__.py | 12 ------------ 6 files changed, 6 insertions(+), 27 deletions(-) diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index f1acf9b8..e223bb97 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -62,13 +62,15 @@ # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal class bcolors: -# HEADER = "\033[95m" -# OKBLUE = "\033[94m" -# OKCYAN = "\033[96m" -# OKGREEN = "\033[92m" + # HEADER = "\033[95m" + # OKBLUE = "\033[94m" + # OKCYAN = "\033[96m" + # OKGREEN = "\033[92m" WARNING = "\033[93m" FAIL = "\033[91m" ENDC = "\033[0m" + + # BOLD = "\033[1m" # UNDERLINE = "\033[4m" diff --git a/py/src/braintrust/otel/test_distributed_tracing.py b/py/src/braintrust/otel/test_distributed_tracing.py index a2fab2a2..1d9b8c86 100644 --- a/py/src/braintrust/otel/test_distributed_tracing.py +++ b/py/src/braintrust/otel/test_distributed_tracing.py @@ -123,7 +123,6 @@ def test_bt_to_otel_simple_distributed_trace(otel_fixture): assert len(otel_spans) == 1, "Should have 1 OTEL span from Service B" # Get the spans - service_a_exported = bt_spans[0] service_b_exported = otel_spans[0] # Convert OTEL IDs to hex for comparison diff --git a/py/src/braintrust/otel/test_otel_bt_integration.py b/py/src/braintrust/otel/test_otel_bt_integration.py index 579082d9..6792982e 100644 --- a/py/src/braintrust/otel/test_otel_bt_integration.py +++ b/py/src/braintrust/otel/test_otel_bt_integration.py @@ -197,7 +197,6 @@ def test_mixed_otel_bt_tracing_with_otel_first(otel_fixture): s1_trace_id = format(s1.context.trace_id, "032x") s1_span_id = format(s1.context.span_id, "016x") s3_trace_id = format(s3.context.trace_id, "032x") - s3_span_id = format(s3.context.span_id, "016x") assert s1_trace_id == s2["root_span_id"] assert s1_trace_id == s3_trace_id diff --git a/py/src/braintrust/test_http.py b/py/src/braintrust/test_http.py index b9ede8d8..ba5ac282 100644 --- a/py/src/braintrust/test_http.py +++ b/py/src/braintrust/test_http.py @@ -404,17 +404,12 @@ def do_GET(self): session.mount("http://", adapter) errors = [] - success_count = 0 lock = threading.Lock() def make_request(i): - nonlocal success_count try: time.sleep(i * 0.005) # Stagger requests resp = session.get(f"{url}/test{i}") - if resp.status_code == 200: - with lock: - success_count += 1 return resp.status_code except Exception as e: with lock: diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py index d9169f29..03049697 100644 --- a/py/src/braintrust/wrappers/anthropic.py +++ b/py/src/braintrust/wrappers/anthropic.py @@ -357,10 +357,6 @@ def wrap_anthropic(client): return client -def wrap_anthropic_client(client): - return wrap_anthropic(client) - - def _apply_anthropic_wrapper(client): """Apply tracing wrapper to an Anthropic client instance in-place.""" wrapped = wrap_anthropic(client) diff --git a/py/src/braintrust/wrappers/google_genai/__init__.py b/py/src/braintrust/wrappers/google_genai/__init__.py index 61df30ab..87a11cae 100644 --- a/py/src/braintrust/wrappers/google_genai/__init__.py +++ b/py/src/braintrust/wrappers/google_genai/__init__.py @@ -417,15 +417,3 @@ def _aggregate_generate_content_chunks( def clean(obj: dict[str, Any]) -> dict[str, Any]: return {k: v for k, v in obj.items() if v is not None} - - -def get_path(obj: dict[str, Any], path: str, default: Any = None) -> Any | None: - keys = path.split(".") - current = obj - - for key in keys: - if not (isinstance(current, dict) and key in current): - return default - current = current[key] - - return current From 8534caedf515efbdfb1951765762f8a26ac81f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 00:27:41 +0000 Subject: [PATCH 7/8] chore: remove probably unused code (need human review) --- py/src/braintrust/test_context.py | 8 -------- py/src/braintrust/test_logger.py | 3 --- py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py | 6 ------ .../wrappers/test_pydantic_ai_integration.py | 10 ++++------ 4 files changed, 4 insertions(+), 23 deletions(-) diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py index 313756cf..9c70a987 100644 --- a/py/src/braintrust/test_context.py +++ b/py/src/braintrust/test_context.py @@ -896,8 +896,6 @@ async def generator_with_finally() -> AsyncGenerator[int, None]: yield 1 yield 2 finally: - # What context do we have during cleanup? - cleanup_span = current_span() gen_span.end() # Consumer @@ -1152,14 +1150,11 @@ def test_nested_spans_same_thread(test_logger, with_memory_logger): # Child span with start_span(name="child") as child_span: - child_id = child_span.id - # Verify child is now current assert current_span().id == child_span.id # Grandchild span with start_span(name="grandchild") as grandchild_span: - grandchild_id = grandchild_span.id assert current_span().id == grandchild_span.id # After grandchild closes, child should be current @@ -1227,13 +1222,10 @@ def test_context_with_exception_propagation(test_logger, with_memory_logger): """ Test that context is properly maintained during exception propagation. """ - fail_span_id = None def failing_function(): - nonlocal fail_span_id # Use context manager for proper span lifecycle with start_span(name="failing_span") as fail_span: - fail_span_id = fail_span.id # During this context, fail_span should be current assert current_span().id == fail_span.id raise ValueError("Expected error") diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index 7662ad77..39513c1c 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -1437,9 +1437,6 @@ def test_span_set_current(with_memory_logger): """Test that span.set_current() makes the span accessible via current_span().""" init_test_logger(__name__) - # Store initial current span - initial_current = braintrust.current_span() - # Start a span that can be set as current (default behavior) span1 = logger.start_span(name="test-span-1") diff --git a/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py b/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py index 5894c5b6..25c9cc95 100644 --- a/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py +++ b/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py @@ -145,9 +145,6 @@ async def run_async(self, *, args, tool_context): # Verify error was logged to span assert mock_span.log.called - # Check if error was logged - log_calls = [call for call in mock_span.log.call_args_list] - # Should have logged the error @pytest.mark.asyncio @@ -316,9 +313,6 @@ async def test_real_context_loss_with_braintrust_spans(): # Initialize a test logger logger = init_logger(project="test-context-loss") - # Track if we hit the context error - context_error_occurred = False - async def problematic_generator(): """Generator that creates a span and yields, simulating the Flow behavior.""" from braintrust import start_span diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py index 81de2ea4..8ed8b2e0 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py +++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py @@ -325,15 +325,13 @@ async def run_multiple_streams(): # First stream async with agent1.run_stream("Count from 1 to 3.") as result1: - full_text1 = "" - async for text in result1.stream_text(delta=True): - full_text1 += text + async for _ in result1.stream_text(delta=True): + pass # Second stream async with agent2.run_stream("Count from 1 to 3.") as result2: - full_text2 = "" - async for text in result2.stream_text(delta=True): - full_text2 += text + async for _ in result2.stream_text(delta=True): + pass return start From d8d052e17853b263613421badc9d2c663c94a469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 18:42:11 +0000 Subject: [PATCH 8/8] chore: wrongly removed code --- py/src/braintrust/db_fields.py | 9 +++++++++ py/src/braintrust/framework.py | 22 ++++++++++++++++++---- pyproject.toml | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/py/src/braintrust/db_fields.py b/py/src/braintrust/db_fields.py index 6fd95df4..a89b9710 100644 --- a/py/src/braintrust/db_fields.py +++ b/py/src/braintrust/db_fields.py @@ -1,12 +1,21 @@ TRANSACTION_ID_FIELD = "_xact_id" OBJECT_DELETE_FIELD = "_object_delete" +CREATED_FIELD = "created" +ID_FIELD = "id" IS_MERGE_FIELD = "_is_merge" +MERGE_PATHS_FIELD = "_merge_paths" +ARRAY_DELETE_FIELD = "_array_delete" AUDIT_SOURCE_FIELD = "_audit_source" AUDIT_METADATA_FIELD = "_audit_metadata" VALID_SOURCES = ["app", "api", "external"] +PARENT_ID_FIELD = "_parent_id" + +ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control" +SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring" + # Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to. OBJECT_ID_KEYS = ( "experiment_id", diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index e223bb97..040b869a 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -69,10 +69,8 @@ class bcolors: WARNING = "\033[93m" FAIL = "\033[91m" ENDC = "\033[0m" - - -# BOLD = "\033[1m" -# UNDERLINE = "\033[4m" + # BOLD = "\033[1m" + # UNDERLINE = "\033[4m" @dataclasses.dataclass @@ -230,6 +228,17 @@ def parameters(self) -> ValidatedParameters | None: """ +class EvalScorerArgs(SerializableDataClass, Generic[Input, Output]): + """ + Arguments passed to an evaluator scorer. This includes the input, expected output, actual output, and metadata. + """ + + input: Input + output: Output + expected: Output | None = None + metadata: Metadata | None = None + + OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]] @@ -1686,6 +1695,11 @@ async def with_max_concurrency(coro): for trial_index in range(evaluator.trial_count): tasks.append(asyncio.create_task(with_max_concurrency(run_evaluator_task(datum, trial_index)))) + if not tasks: + eprint( + f"{bcolors.WARNING}Warning: no data rows found for evaluator '{evaluator.eval_name}'. The experiment will be empty.{bcolors.ENDC}" + ) + results = [] for task in std_tqdm(tasks, desc=f"{evaluator.eval_name} (tasks)", position=position, disable=position is None): results.append(await task) diff --git a/pyproject.toml b/pyproject.toml index 31230cc9..212d6046 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,5 +23,5 @@ addopts = "--durations=3 --durations-min=0.1" [tool.vulture] paths = ["py/src"] -ignore_names = ["with_simulate_login", "reset_id_generator_state", "dataset_record_id"] # pytest fixtures and deprecated-but-public API parameters +ignore_names = ["with_simulate_login", "reset_id_generator_state", "dataset_record_id", "EvalScorerArgs", "CREATED_FIELD", "ID_FIELD", "MERGE_PATHS_FIELD", "ARRAY_DELETE_FIELD", "PARENT_ID_FIELD", "ASYNC_SCORING_CONTROL_FIELD", "SKIP_ASYNC_SCORING_FIELD"] # pytest fixtures, deprecated-but-public API, and protocol field constants min_confidence = 100