From fcae7d7b7797a74f5f72226b63ef076e3cf1617c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Fri, 20 Mar 2026 23:39:31 +0000
Subject: [PATCH 1/8] chore:remove dead code

---
 py/src/braintrust/framework.py                     |  4 ++--
 py/src/braintrust/logger.py                        |  5 ++---
 .../braintrust/wrappers/agno/_test_agno_helpers.py | 14 +++++++-------
 .../wrappers/claude_agent_sdk/_test_transport.py   |  4 ++--
 .../wrappers/test_pydantic_ai_integration.py       |  4 +---
 5 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index d80fb1f9..05a92ff6 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -203,7 +203,7 @@ def tags(self) -> Sequence[str]:
         """
 
     @abc.abstractmethod
-    def report_progress(self, progress: TaskProgressEvent) -> None:
+    def report_progress(self, _progress: TaskProgressEvent) -> None:
         """
         Report progress that will show up in the playground.
         """
@@ -459,7 +459,7 @@ class EvalResultWithSummary(SerializableDataClass, Generic[Input, Output]):
     summary: ExperimentSummary
     results: list[EvalResult[Input, Output]]
 
-    def _repr_pretty_(self, p, cycle):
+    def _repr_pretty_(self, p, _cycle):
         p.text(f'EvalResultWithSummary(summary="...", results=[...])')
 
 
diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index a9ba479b..54e45d84 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -1437,7 +1437,7 @@ def _register_dropped_item_count(self, num_items):
                 self._queue_drop_logging_state["last_logged_timestamp"] = time_now
 
     @staticmethod
-    def _write_payload_to_dir(payload_dir, payload, debug_logging_adjective=None):
+    def _write_payload_to_dir(payload_dir, payload):
         payload_file = os.path.join(payload_dir, f"payload_{time.time()}_{str(uuid.uuid4())[:8]}.json")
         try:
             os.makedirs(payload_dir, exist_ok=True)
@@ -2831,7 +2831,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any])
 # Note that this only checks properties that are expected of a complete event.
 # _validate_and_sanitize_experiment_log_partial_args should still be invoked
 # (after handling special fields like 'id').
-def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any], has_dataset: bool) -> Mapping[str, Any]:
+def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any]) -> Mapping[str, Any]:
     input = event.get("input")
     inputs = event.get("inputs")
     if (input is not None and inputs is not None) or (input is None and inputs is None):
@@ -3861,7 +3861,6 @@ def log(
                 metrics=metrics,
                 id=id,
             ),
-            self.dataset is not None,
         )
         span = self._start_span_impl(start_time=self.last_start_time, lookup_span_parent=False, **event)
         self.last_start_time = span.end()
diff --git a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
index fcb926e1..00fa2ce0 100644
--- a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
+++ b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
@@ -56,10 +56,10 @@ def __init__(self):
             self.name = name
             self.steps = ["first-step"]
 
-        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None):
+        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None):
             return FakeWorkflowRunResponse(input=execution_input.input, content="workflow-async")
 
-        def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
+        def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
             yield FakeEvent("WorkflowStarted", content=None)
             yield FakeEvent("StepStarted", content=None)
             yield FakeEvent("StepCompleted", content="hello ")
@@ -74,7 +74,7 @@ def __init__(self):
             self.name = name
             self.steps = ["first-step"]
 
-        def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
+        def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
             yield FakeEvent("StepCompleted", content="hello")
             yield FakeEvent("WorkflowCompleted", content="hello", metrics=FakeMetrics(), status="COMPLETED")
 
@@ -87,7 +87,7 @@ def __init__(self):
             self.name = name
             self.steps = ["first-step"]
 
-        def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
+        def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
             yield FakeEvent("WorkflowStarted", content=None)
             yield FakeEvent("StepCompleted", content="hello ")
             workflow_run_response.content = "world"
@@ -115,7 +115,7 @@ def __init__(self):
             self.steps = ["agent-step"]
             self.agent = WrappedAgent()
 
-        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None):
+        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None):
             return await self.agent.arun(execution_input.input)
 
     return FakeWorkflow
@@ -128,7 +128,7 @@ def __init__(self):
             self.id = "workflow-agent-123"
             self.steps = ["agent-step"]
 
-        def _execute_workflow_agent(self, user_input, session, execution_input, run_context, stream=False, **kwargs):
+        def _execute_workflow_agent(self, user_input, session, execution_input, _run_context, stream=False, **kwargs):
             if stream:
 
                 def _stream():
@@ -143,7 +143,7 @@ def _stream():
                 return _stream()
             return FakeRunOutput(f"{user_input}-sync")
 
-        async def _aexecute_workflow_agent(self, user_input, run_context, execution_input, stream=False, **kwargs):
+        async def _aexecute_workflow_agent(self, user_input, _run_context, execution_input, stream=False, **kwargs):
             if stream:
 
                 async def _astream():
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py
index 3a516568..cb95e336 100644
--- a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py
+++ b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py
@@ -68,8 +68,8 @@ def _normalize_write(data: str, *, sanitize: bool = False) -> dict[str, Any]:
 
 
 async def _empty_stream():
-    return
-    yield {}  # type: ignore[unreachable]
+    for _ in ():
+        yield {}
 
 
 def _normalize_for_match(value: Any) -> Any:
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
index b794b18b..81de2ea4 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
+++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
@@ -184,13 +184,11 @@ async def fake_run_chat(
         *,
         stream,
         agent,
-        deps,
-        console,
-        code_theme,
         prog_name,
         message_history,
         model_settings=None,
         usage_limits=None,
+        **_,
     ):
         assert stream is True
         assert prog_name == "braintrust-cli"

From 6e8b52776cc1c654152a64c0d8faa493765598c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Mon, 23 Mar 2026 17:58:29 +0000
Subject: [PATCH 2/8] chore: add vulture to pyproject.toml

---
 pyproject.toml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index b7d159c8..31230cc9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,3 +20,8 @@ split-on-trailing-comma = true
 asyncio_mode = "strict"
 asyncio_default_fixture_loop_scope = "function"
 addopts = "--durations=3 --durations-min=0.1"
+
+[tool.vulture]
+paths = ["py/src"]
+ignore_names = ["with_simulate_login", "reset_id_generator_state", "dataset_record_id"] # pytest fixtures and deprecated-but-public API parameters
+min_confidence = 100

From 75e304c54c7037109fc12bd15861b0a0e7df57fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Mon, 23 Mar 2026 20:53:19 +0000
Subject: [PATCH 3/8] chore: add vulture to pre-commit

---
 .pre-commit-config.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e9df688d..7ea78815 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,3 +32,8 @@ repos:
         args:
           - "-L"
           - "rouge,coo,couldn,unsecure,ontext,afterall,als"
+  - repo: https://github.com/jendrikseipp/vulture
+    rev: v2.15
+    hooks:
+      - id: vulture
+        pass_filenames: false

From bfa3b2c5f4e63b610a083a7f10ef54308ec03f1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Tue, 24 Mar 2026 18:52:42 +0000
Subject: [PATCH 4/8] chore: remove probably unused code (need human review)

---
 py/src/braintrust/http_headers.py | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 py/src/braintrust/http_headers.py

diff --git a/py/src/braintrust/http_headers.py b/py/src/braintrust/http_headers.py
deleted file mode 100644
index 138a1f03..00000000
--- a/py/src/braintrust/http_headers.py
+++ /dev/null
@@ -1,4 +0,0 @@
-BT_FOUND_EXISTING_HEADER = "x-bt-found-existing"
-BT_CURSOR_HEADER = "x-bt-cursor"
-BT_IMPERSONATE_USER = "x-bt-impersonate-user"
-BT_PARENT = "x-bt-parent"

From 2690c1a2a9606cb39456503e89ccd9df81898fb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Tue, 24 Mar 2026 22:31:31 +0000
Subject: [PATCH 5/8] chore: remove probably unused code (need human review)

---
 py/src/braintrust/cli/eval.py                 |  2 +-
 py/src/braintrust/cli/install/logs.py         |  1 -
 py/src/braintrust/db_fields.py                |  9 ---
 py/src/braintrust/framework.py                | 27 ++-----
 py/src/braintrust/logger.py                   |  5 --
 py/src/braintrust/parameters.py               |  7 --
 py/src/braintrust/queue.py                    |  2 -
 py/src/braintrust/wrappers/adk/__init__.py    |  2 -
 py/src/braintrust/wrappers/anthropic.py       |  1 -
 .../wrappers/claude_agent_sdk/_wrapper.py     |  4 -
 py/src/braintrust/wrappers/langchain.py       |  2 -
 py/src/braintrust/wrappers/pydantic_ai.py     | 74 -------------------
 12 files changed, 9 insertions(+), 127 deletions(-)

diff --git a/py/src/braintrust/cli/eval.py b/py/src/braintrust/cli/eval.py
index f0e5dc89..595c2c0a 100644
--- a/py/src/braintrust/cli/eval.py
+++ b/py/src/braintrust/cli/eval.py
@@ -246,7 +246,7 @@ def check_match(path_input, include_patterns, exclude_patterns):
 
 def collect_files(input_path):
     if os.path.isdir(input_path):
-        for root, dirs, files in os.walk(input_path):
+        for root, _, files in os.walk(input_path):
             for file in files:
                 fname = os.path.join(root, file)
                 if check_match(fname, INCLUDE, EXCLUDE):
diff --git a/py/src/braintrust/cli/install/logs.py b/py/src/braintrust/cli/install/logs.py
index 2b840aec..4d46ad87 100644
--- a/py/src/braintrust/cli/install/logs.py
+++ b/py/src/braintrust/cli/install/logs.py
@@ -88,7 +88,6 @@ def get_events(stream):
         with ThreadPoolExecutor(8) as executor:
             events = executor.map(get_events, all_streams)
 
-        last_ts = None
         for stream, log in zip(all_streams, events):
             print(f"---- LOG STREAM: {stream['logStreamName']}")
             for event in log["events"]:
diff --git a/py/src/braintrust/db_fields.py b/py/src/braintrust/db_fields.py
index a89b9710..6fd95df4 100644
--- a/py/src/braintrust/db_fields.py
+++ b/py/src/braintrust/db_fields.py
@@ -1,21 +1,12 @@
 TRANSACTION_ID_FIELD = "_xact_id"
 OBJECT_DELETE_FIELD = "_object_delete"
-CREATED_FIELD = "created"
-ID_FIELD = "id"
 
 IS_MERGE_FIELD = "_is_merge"
-MERGE_PATHS_FIELD = "_merge_paths"
-ARRAY_DELETE_FIELD = "_array_delete"
 
 AUDIT_SOURCE_FIELD = "_audit_source"
 AUDIT_METADATA_FIELD = "_audit_metadata"
 VALID_SOURCES = ["app", "api", "external"]
 
-PARENT_ID_FIELD = "_parent_id"
-
-ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control"
-SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring"
-
 # Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to.
 OBJECT_ID_KEYS = (
     "experiment_id",
diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index 05a92ff6..f1acf9b8 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -62,15 +62,15 @@
 
 # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
 class bcolors:
-    HEADER = "\033[95m"
-    OKBLUE = "\033[94m"
-    OKCYAN = "\033[96m"
-    OKGREEN = "\033[92m"
+#     HEADER = "\033[95m"
+#     OKBLUE = "\033[94m"
+#     OKCYAN = "\033[96m"
+#     OKGREEN = "\033[92m"
     WARNING = "\033[93m"
     FAIL = "\033[91m"
     ENDC = "\033[0m"
-    BOLD = "\033[1m"
-    UNDERLINE = "\033[4m"
+#     BOLD = "\033[1m"
+#     UNDERLINE = "\033[4m"
 
 
 @dataclasses.dataclass
@@ -228,17 +228,6 @@ def parameters(self) -> ValidatedParameters | None:
         """
 
 
-class EvalScorerArgs(SerializableDataClass, Generic[Input, Output]):
-    """
-    Arguments passed to an evaluator scorer. This includes the input, expected output, actual output, and metadata.
-    """
-
-    input: Input
-    output: Output
-    expected: Output | None = None
-    metadata: Metadata | None = None
-
-
 OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]]
 
 
@@ -850,7 +839,7 @@ async def EvalAsync(
     :param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`.
     :param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation.
     :param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function
-    that takes an `EvalScorerArgs` object and returns a `Score` object.
+    that takes `(input, output, expected)` arguments and returns a `Score` object.
     :param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically.
     :param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that
     have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results.
@@ -977,7 +966,7 @@ def Eval(
     :param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`.
     :param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation.
     :param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function
-    that takes an `EvalScorerArgs` object and returns a `Score` object.
+    that takes `(input, output, expected)` arguments and returns a `Score` object.
     :param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically.
     :param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that
     have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results.
diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index 54e45d84..84260023 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -1060,9 +1060,6 @@ def __init__(self, api_conn: LazyValue[HTTPConnection]):
         self.logger = logging.getLogger("braintrust")
         self.queue: "LogQueue[LazyValue[Dict[str, Any]]]" = LogQueue(maxsize=self.queue_maxsize)
 
-        # Counter for tracking overflow uploads (useful for testing)
-        self._overflow_upload_count = 0
-
         if not disable_atexit_flush:
             atexit.register(self._finalize)
 
@@ -1374,8 +1371,6 @@ def _submit_logs_request(self, items: Sequence[LogItemWithMeta], max_request_siz
             except Exception as e:
                 error = e
             if error is None and resp is not None and resp.ok:
-                if overflow_rows:
-                    self._overflow_upload_count += 1
                 return
             if error is None and resp is not None:
                 resp_errmsg = f"{resp.status_code}: {resp.text}"
diff --git a/py/src/braintrust/parameters.py b/py/src/braintrust/parameters.py
index 595ba3ce..ac9d4a86 100644
--- a/py/src/braintrust/parameters.py
+++ b/py/src/braintrust/parameters.py
@@ -63,13 +63,6 @@ def from_function_row(cls, row: dict[str, Any]) -> "RemoteEvalParameters":
             data=function_data.get("data") or {},
         )
 
-    def validate(self, data: Any) -> bool:
-        try:
-            validate_json_schema(data, self.schema)
-            return True
-        except ValueError:
-            return False
-
 
 def _pydantic_to_json_schema(model: Any) -> dict[str, Any]:
     """Convert a pydantic model to JSON schema."""
diff --git a/py/src/braintrust/queue.py b/py/src/braintrust/queue.py
index ff6fc6cf..cfd5e834 100644
--- a/py/src/braintrust/queue.py
+++ b/py/src/braintrust/queue.py
@@ -32,7 +32,6 @@ def __init__(self, maxsize: int = 0):
         self._mutex = threading.Lock()
         self._queue: deque[T] = deque(maxlen=maxsize)
         self._has_items_event = threading.Event()
-        self._total_dropped = 0
         self._enforce_size_limit = False
 
     def enforce_queue_size_limit(self, enforce: bool) -> None:
@@ -68,7 +67,6 @@ def put(self, item: T) -> list[T]:
                 while len(self._queue) >= self.maxsize:
                     dropped_item = self._queue.popleft()
                     dropped.append(dropped_item)
-                    self._total_dropped += 1
                 self._queue.append(item)
 
             # Signal that items are available if queue was not empty before or item was added
diff --git a/py/src/braintrust/wrappers/adk/__init__.py b/py/src/braintrust/wrappers/adk/__init__.py
index 6c6b8a14..3f9036ab 100644
--- a/py/src/braintrust/wrappers/adk/__init__.py
+++ b/py/src/braintrust/wrappers/adk/__init__.py
@@ -412,8 +412,6 @@ def _determine_llm_call_type(llm_request: Any, model_response: Any = None) -> st
         request_dict = cast(dict[str, Any], bt_safe_deep_copy(llm_request))
 
         # Check if there are tools in the config
-        has_tools = bool(request_dict.get("config", {}).get("tools"))
-
         # Check the conversation history for function responses
         contents = request_dict.get("contents", [])
         has_function_response = False
diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py
index 8357fc1e..d9169f29 100644
--- a/py/src/braintrust/wrappers/anthropic.py
+++ b/py/src/braintrust/wrappers/anthropic.py
@@ -239,7 +239,6 @@ def __init__(self, msg_stream, span, request_start_time: float):
         super().__init__(msg_stream)
         self.__msg_stream = msg_stream
         self.__span = span
-        self.__metrics = {}
         self.__snapshot = None
         self.__request_start_time = request_start_time
         self.__time_to_first_token: float | None = None
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py
index e019241d..71460302 100644
--- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py
+++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py
@@ -79,10 +79,6 @@ def release(self) -> None:
 _NOOP_ACTIVE_TOOL_SPAN = _NoopActiveToolSpan()
 
 
-def _log_tracing_warning(exc: Exception) -> None:
-    log.warning("Error in tracing code", exc_info=exc)
-
-
 def _parse_tool_name(tool_name: Any) -> ParsedToolName:
     raw_name = str(tool_name) if tool_name is not None else DEFAULT_TOOL_NAME
 
diff --git a/py/src/braintrust/wrappers/langchain.py b/py/src/braintrust/wrappers/langchain.py
index 6beeb578..28924196 100644
--- a/py/src/braintrust/wrappers/langchain.py
+++ b/py/src/braintrust/wrappers/langchain.py
@@ -11,14 +11,12 @@
 try:
     from langchain.callbacks.base import BaseCallbackHandler
     from langchain.schema import Document
-    from langchain.schema.agent import AgentAction
     from langchain.schema.messages import BaseMessage
     from langchain.schema.output import LLMResult
 except ImportError:
     _logger.warning("Failed to import langchain, using stubs")
     BaseCallbackHandler = object
     Document = object
-    AgentAction = object
     BaseMessage = object
     LLMResult = object
 
diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py
index e3442b85..6dd7ca45 100644
--- a/py/src/braintrust/wrappers/pydantic_ai.py
+++ b/py/src/braintrust/wrappers/pydantic_ai.py
@@ -327,80 +327,6 @@ def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
     return wrapper
 
 
-def wrap_model_request(original_func: Any) -> Any:
-    async def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        with start_span(
-            name="model_request",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = await original_func(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    return wrapper
-
-
-def wrap_model_request_sync(original_func: Any) -> Any:
-    def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        with start_span(
-            name="model_request_sync",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = original_func(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    return wrapper
-
-
-def wrap_model_request_stream(original_func: Any) -> Any:
-    def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        return _DirectStreamWrapper(
-            original_func(*args, **kwargs),
-            "model_request_stream",
-            input_data,
-            metadata,
-        )
-
-    return wrapper
-
-
-def wrap_model_request_stream_sync(original_func: Any) -> Any:
-    def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        return _DirectStreamWrapperSync(
-            original_func(*args, **kwargs),
-            "model_request_stream_sync",
-            input_data,
-            metadata,
-        )
-
-    return wrapper
-
-
 def wrap_model_classes():
     """Wrap Model classes to capture internal model requests made by agents."""
     try:

From 1646673a623b4bfc2628e134beb88af6ace19ab9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Tue, 24 Mar 2026 23:22:06 +0000
Subject: [PATCH 6/8] chore: remove probably unused code (need human review)

---
 py/src/braintrust/framework.py                      | 10 ++++++----
 py/src/braintrust/otel/test_distributed_tracing.py  |  1 -
 py/src/braintrust/otel/test_otel_bt_integration.py  |  1 -
 py/src/braintrust/test_http.py                      |  5 -----
 py/src/braintrust/wrappers/anthropic.py             |  4 ----
 py/src/braintrust/wrappers/google_genai/__init__.py | 12 ------------
 6 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index f1acf9b8..e223bb97 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -62,13 +62,15 @@
 
 # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
 class bcolors:
-#     HEADER = "\033[95m"
-#     OKBLUE = "\033[94m"
-#     OKCYAN = "\033[96m"
-#     OKGREEN = "\033[92m"
+    # HEADER = "\033[95m"
+    # OKBLUE = "\033[94m"
+    # OKCYAN = "\033[96m"
+    # OKGREEN = "\033[92m"
     WARNING = "\033[93m"
     FAIL = "\033[91m"
     ENDC = "\033[0m"
+
+
 #     BOLD = "\033[1m"
 #     UNDERLINE = "\033[4m"
 
diff --git a/py/src/braintrust/otel/test_distributed_tracing.py b/py/src/braintrust/otel/test_distributed_tracing.py
index a2fab2a2..1d9b8c86 100644
--- a/py/src/braintrust/otel/test_distributed_tracing.py
+++ b/py/src/braintrust/otel/test_distributed_tracing.py
@@ -123,7 +123,6 @@ def test_bt_to_otel_simple_distributed_trace(otel_fixture):
     assert len(otel_spans) == 1, "Should have 1 OTEL span from Service B"
 
     # Get the spans
-    service_a_exported = bt_spans[0]
     service_b_exported = otel_spans[0]
 
     # Convert OTEL IDs to hex for comparison
diff --git a/py/src/braintrust/otel/test_otel_bt_integration.py b/py/src/braintrust/otel/test_otel_bt_integration.py
index 579082d9..6792982e 100644
--- a/py/src/braintrust/otel/test_otel_bt_integration.py
+++ b/py/src/braintrust/otel/test_otel_bt_integration.py
@@ -197,7 +197,6 @@ def test_mixed_otel_bt_tracing_with_otel_first(otel_fixture):
     s1_trace_id = format(s1.context.trace_id, "032x")
     s1_span_id = format(s1.context.span_id, "016x")
     s3_trace_id = format(s3.context.trace_id, "032x")
-    s3_span_id = format(s3.context.span_id, "016x")
 
     assert s1_trace_id == s2["root_span_id"]
     assert s1_trace_id == s3_trace_id
diff --git a/py/src/braintrust/test_http.py b/py/src/braintrust/test_http.py
index b9ede8d8..ba5ac282 100644
--- a/py/src/braintrust/test_http.py
+++ b/py/src/braintrust/test_http.py
@@ -404,17 +404,12 @@ def do_GET(self):
             session.mount("http://", adapter)
 
             errors = []
-            success_count = 0
             lock = threading.Lock()
 
             def make_request(i):
-                nonlocal success_count
                 try:
                     time.sleep(i * 0.005)  # Stagger requests
                     resp = session.get(f"{url}/test{i}")
-                    if resp.status_code == 200:
-                        with lock:
-                            success_count += 1
                     return resp.status_code
                 except Exception as e:
                     with lock:
diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py
index d9169f29..03049697 100644
--- a/py/src/braintrust/wrappers/anthropic.py
+++ b/py/src/braintrust/wrappers/anthropic.py
@@ -357,10 +357,6 @@ def wrap_anthropic(client):
         return client
 
 
-def wrap_anthropic_client(client):
-    return wrap_anthropic(client)
-
-
 def _apply_anthropic_wrapper(client):
     """Apply tracing wrapper to an Anthropic client instance in-place."""
     wrapped = wrap_anthropic(client)
diff --git a/py/src/braintrust/wrappers/google_genai/__init__.py b/py/src/braintrust/wrappers/google_genai/__init__.py
index 61df30ab..87a11cae 100644
--- a/py/src/braintrust/wrappers/google_genai/__init__.py
+++ b/py/src/braintrust/wrappers/google_genai/__init__.py
@@ -417,15 +417,3 @@ def _aggregate_generate_content_chunks(
 
 def clean(obj: dict[str, Any]) -> dict[str, Any]:
     return {k: v for k, v in obj.items() if v is not None}
-
-
-def get_path(obj: dict[str, Any], path: str, default: Any = None) -> Any | None:
-    keys = path.split(".")
-    current = obj
-
-    for key in keys:
-        if not (isinstance(current, dict) and key in current):
-            return default
-        current = current[key]
-
-    return current

From 8534caedf515efbdfb1951765762f8a26ac81f6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 00:27:41 +0000
Subject: [PATCH 7/8] chore: remove probably unused code (need human review)

---
 py/src/braintrust/test_context.py                      |  8 --------
 py/src/braintrust/test_logger.py                       |  3 ---
 py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py    |  6 ------
 .../wrappers/test_pydantic_ai_integration.py           | 10 ++++------
 4 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py
index 313756cf..9c70a987 100644
--- a/py/src/braintrust/test_context.py
+++ b/py/src/braintrust/test_context.py
@@ -896,8 +896,6 @@ async def generator_with_finally() -> AsyncGenerator[int, None]:
             yield 1
             yield 2
         finally:
-            # What context do we have during cleanup?
-            cleanup_span = current_span()
             gen_span.end()
 
     # Consumer
@@ -1152,14 +1150,11 @@ def test_nested_spans_same_thread(test_logger, with_memory_logger):
 
         # Child span
         with start_span(name="child") as child_span:
-            child_id = child_span.id
-
             # Verify child is now current
             assert current_span().id == child_span.id
 
             # Grandchild span
             with start_span(name="grandchild") as grandchild_span:
-                grandchild_id = grandchild_span.id
                 assert current_span().id == grandchild_span.id
 
             # After grandchild closes, child should be current
@@ -1227,13 +1222,10 @@ def test_context_with_exception_propagation(test_logger, with_memory_logger):
     """
     Test that context is properly maintained during exception propagation.
     """
-    fail_span_id = None
 
     def failing_function():
-        nonlocal fail_span_id
         # Use context manager for proper span lifecycle
         with start_span(name="failing_span") as fail_span:
-            fail_span_id = fail_span.id
             # During this context, fail_span should be current
             assert current_span().id == fail_span.id
             raise ValueError("Expected error")
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 7662ad77..39513c1c 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -1437,9 +1437,6 @@ def test_span_set_current(with_memory_logger):
     """Test that span.set_current() makes the span accessible via current_span()."""
     init_test_logger(__name__)
 
-    # Store initial current span
-    initial_current = braintrust.current_span()
-
     # Start a span that can be set as current (default behavior)
     span1 = logger.start_span(name="test-span-1")
 
diff --git a/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py b/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py
index 5894c5b6..25c9cc95 100644
--- a/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py
+++ b/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py
@@ -145,9 +145,6 @@ async def run_async(self, *, args, tool_context):
 
         # Verify error was logged to span
         assert mock_span.log.called
-        # Check if error was logged
-        log_calls = [call for call in mock_span.log.call_args_list]
-        # Should have logged the error
 
 
 @pytest.mark.asyncio
@@ -316,9 +313,6 @@ async def test_real_context_loss_with_braintrust_spans():
     # Initialize a test logger
     logger = init_logger(project="test-context-loss")
 
-    # Track if we hit the context error
-    context_error_occurred = False
-
     async def problematic_generator():
         """Generator that creates a span and yields, simulating the Flow behavior."""
         from braintrust import start_span
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
index 81de2ea4..8ed8b2e0 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
+++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
@@ -325,15 +325,13 @@ async def run_multiple_streams():
 
         # First stream
         async with agent1.run_stream("Count from 1 to 3.") as result1:
-            full_text1 = ""
-            async for text in result1.stream_text(delta=True):
-                full_text1 += text
+            async for _ in result1.stream_text(delta=True):
+                pass
 
         # Second stream
         async with agent2.run_stream("Count from 1 to 3.") as result2:
-            full_text2 = ""
-            async for text in result2.stream_text(delta=True):
-                full_text2 += text
+            async for _ in result2.stream_text(delta=True):
+                pass
 
         return start
 

From d8d052e17853b263613421badc9d2c663c94a469 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 18:42:11 +0000
Subject: [PATCH 8/8] chore: wrongly removed code

---
 py/src/braintrust/db_fields.py |  9 +++++++++
 py/src/braintrust/framework.py | 22 ++++++++++++++++++----
 pyproject.toml                 |  2 +-
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/py/src/braintrust/db_fields.py b/py/src/braintrust/db_fields.py
index 6fd95df4..a89b9710 100644
--- a/py/src/braintrust/db_fields.py
+++ b/py/src/braintrust/db_fields.py
@@ -1,12 +1,21 @@
 TRANSACTION_ID_FIELD = "_xact_id"
 OBJECT_DELETE_FIELD = "_object_delete"
+CREATED_FIELD = "created"
+ID_FIELD = "id"
 
 IS_MERGE_FIELD = "_is_merge"
+MERGE_PATHS_FIELD = "_merge_paths"
+ARRAY_DELETE_FIELD = "_array_delete"
 
 AUDIT_SOURCE_FIELD = "_audit_source"
 AUDIT_METADATA_FIELD = "_audit_metadata"
 VALID_SOURCES = ["app", "api", "external"]
 
+PARENT_ID_FIELD = "_parent_id"
+
+ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control"
+SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring"
+
 # Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to.
 OBJECT_ID_KEYS = (
     "experiment_id",
diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index e223bb97..040b869a 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -69,10 +69,8 @@ class bcolors:
     WARNING = "\033[93m"
     FAIL = "\033[91m"
     ENDC = "\033[0m"
-
-
-#     BOLD = "\033[1m"
-#     UNDERLINE = "\033[4m"
+    # BOLD = "\033[1m"
+    # UNDERLINE = "\033[4m"
 
 
 @dataclasses.dataclass
@@ -230,6 +228,17 @@ def parameters(self) -> ValidatedParameters | None:
         """
 
 
+class EvalScorerArgs(SerializableDataClass, Generic[Input, Output]):
+    """
+    Arguments passed to an evaluator scorer. This includes the input, expected output, actual output, and metadata.
+    """
+
+    input: Input
+    output: Output
+    expected: Output | None = None
+    metadata: Metadata | None = None
+
+
 OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]]
 
 
@@ -1686,6 +1695,11 @@ async def with_max_concurrency(coro):
             for trial_index in range(evaluator.trial_count):
                 tasks.append(asyncio.create_task(with_max_concurrency(run_evaluator_task(datum, trial_index))))
 
+    if not tasks:
+        eprint(
+            f"{bcolors.WARNING}Warning: no data rows found for evaluator '{evaluator.eval_name}'. The experiment will be empty.{bcolors.ENDC}"
+        )
+
     results = []
     for task in std_tqdm(tasks, desc=f"{evaluator.eval_name} (tasks)", position=position, disable=position is None):
         results.append(await task)
diff --git a/pyproject.toml b/pyproject.toml
index 31230cc9..212d6046 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,5 +23,5 @@ addopts = "--durations=3 --durations-min=0.1"
 
 [tool.vulture]
 paths = ["py/src"]
-ignore_names = ["with_simulate_login", "reset_id_generator_state", "dataset_record_id"] # pytest fixtures and deprecated-but-public API parameters
+ignore_names = ["with_simulate_login", "reset_id_generator_state", "dataset_record_id", "EvalScorerArgs", "CREATED_FIELD", "ID_FIELD", "MERGE_PATHS_FIELD", "ARRAY_DELETE_FIELD", "PARENT_ID_FIELD", "ASYNC_SCORING_CONTROL_FIELD", "SKIP_ASYNC_SCORING_FIELD"] # pytest fixtures, deprecated-but-public API, and protocol field constants
 min_confidence = 100