braintrustdata · ViaDézo1er / cedric (viadezo1er) · Mar 20, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 24, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -32,3 +32,8 @@ repos:
         args:
           - "-L"
           - "rouge,coo,couldn,unsecure,ontext,afterall,als"
+  - repo: https://github.com/jendrikseipp/vulture
+    rev: v2.15
+    hooks:
+      - id: vulture
+        pass_filenames: false
diff --git a/py/src/braintrust/cli/eval.py b/py/src/braintrust/cli/eval.py
@@ -246,7 +246,7 @@ def check_match(path_input, include_patterns, exclude_patterns):
 
 def collect_files(input_path):
     if os.path.isdir(input_path):
-        for root, dirs, files in os.walk(input_path):
+        for root, _, files in os.walk(input_path):
             for file in files:
                 fname = os.path.join(root, file)
                 if check_match(fname, INCLUDE, EXCLUDE):

diff --git a/py/src/braintrust/cli/install/logs.py b/py/src/braintrust/cli/install/logs.py
@@ -88,7 +88,6 @@ def get_events(stream):
         with ThreadPoolExecutor(8) as executor:
             events = executor.map(get_events, all_streams)
 
-        last_ts = None
         for stream, log in zip(all_streams, events):
             print(f"---- LOG STREAM: {stream['logStreamName']}")
             for event in log["events"]:

diff --git a/py/src/braintrust/db_fields.py b/py/src/braintrust/db_fields.py
@@ -1,21 +1,12 @@
 TRANSACTION_ID_FIELD = "_xact_id"
 OBJECT_DELETE_FIELD = "_object_delete"
-CREATED_FIELD = "created"
-ID_FIELD = "id"
 
 IS_MERGE_FIELD = "_is_merge"
-MERGE_PATHS_FIELD = "_merge_paths"
-ARRAY_DELETE_FIELD = "_array_delete"
 
 AUDIT_SOURCE_FIELD = "_audit_source"
 AUDIT_METADATA_FIELD = "_audit_metadata"
 VALID_SOURCES = ["app", "api", "external"]
 
-PARENT_ID_FIELD = "_parent_id"
-
-ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control"
-SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring"
-
 # Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to.
 OBJECT_ID_KEYS = (
     "experiment_id",

diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
@@ -62,15 +62,17 @@
 
 # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
 class bcolors:
-    HEADER = "\033[95m"
-    OKBLUE = "\033[94m"
-    OKCYAN = "\033[96m"
-    OKGREEN = "\033[92m"
+    # HEADER = "\033[95m"
+    # OKBLUE = "\033[94m"
+    # OKCYAN = "\033[96m"
+    # OKGREEN = "\033[92m"
     WARNING = "\033[93m"
     FAIL = "\033[91m"
     ENDC = "\033[0m"
-    BOLD = "\033[1m"
-    UNDERLINE = "\033[4m"
+
+
+#     BOLD = "\033[1m"
+#     UNDERLINE = "\033[4m"
 
 
 @dataclasses.dataclass
@@ -203,7 +205,7 @@ def tags(self) -> Sequence[str]:
         """
 
     @abc.abstractmethod
-    def report_progress(self, progress: TaskProgressEvent) -> None:
+    def report_progress(self, _progress: TaskProgressEvent) -> None:
         """
         Report progress that will show up in the playground.
         """
@@ -228,17 +230,6 @@ def parameters(self) -> ValidatedParameters | None:
         """
 
 
-class EvalScorerArgs(SerializableDataClass, Generic[Input, Output]):
-    """
-    Arguments passed to an evaluator scorer. This includes the input, expected output, actual output, and metadata.
-    """
-
-    input: Input
-    output: Output
-    expected: Output | None = None
-    metadata: Metadata | None = None
-
-
 OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]]
 
 
@@ -459,7 +450,7 @@ class EvalResultWithSummary(SerializableDataClass, Generic[Input, Output]):
     summary: ExperimentSummary
     results: list[EvalResult[Input, Output]]
 
-    def _repr_pretty_(self, p, cycle):
+    def _repr_pretty_(self, p, _cycle):
         p.text(f'EvalResultWithSummary(summary="...", results=[...])')
 
 
@@ -850,7 +841,7 @@ async def EvalAsync(
     :param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`.
     :param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation.
     :param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function
-    that takes an `EvalScorerArgs` object and returns a `Score` object.
+    that takes `(input, output, expected)` arguments and returns a `Score` object.
     :param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically.
     :param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that
     have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results.
@@ -977,7 +968,7 @@ def Eval(
     :param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`.
     :param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation.
     :param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function
-    that takes an `EvalScorerArgs` object and returns a `Score` object.
+    that takes `(input, output, expected)` arguments and returns a `Score` object.
     :param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically.
     :param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that
     have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results.

diff --git a/py/src/braintrust/http_headers.py b/py/src/braintrust/http_headers.py
diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
@@ -1060,9 +1060,6 @@ def __init__(self, api_conn: LazyValue[HTTPConnection]):
         self.logger = logging.getLogger("braintrust")
         self.queue: "LogQueue[LazyValue[Dict[str, Any]]]" = LogQueue(maxsize=self.queue_maxsize)
 
-        # Counter for tracking overflow uploads (useful for testing)
-        self._overflow_upload_count = 0
-
         if not disable_atexit_flush:
             atexit.register(self._finalize)
 
@@ -1374,8 +1371,6 @@ def _submit_logs_request(self, items: Sequence[LogItemWithMeta], max_request_siz
             except Exception as e:
                 error = e
             if error is None and resp is not None and resp.ok:
-                if overflow_rows:
-                    self._overflow_upload_count += 1
                 return
             if error is None and resp is not None:
                 resp_errmsg = f"{resp.status_code}: {resp.text}"
@@ -1437,7 +1432,7 @@ def _register_dropped_item_count(self, num_items):
                 self._queue_drop_logging_state["last_logged_timestamp"] = time_now
 
     @staticmethod
-    def _write_payload_to_dir(payload_dir, payload, debug_logging_adjective=None):
+    def _write_payload_to_dir(payload_dir, payload):
         payload_file = os.path.join(payload_dir, f"payload_{time.time()}_{str(uuid.uuid4())[:8]}.json")
         try:
             os.makedirs(payload_dir, exist_ok=True)
@@ -2831,7 +2826,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any])
 # Note that this only checks properties that are expected of a complete event.
 # _validate_and_sanitize_experiment_log_partial_args should still be invoked
 # (after handling special fields like 'id').
-def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any], has_dataset: bool) -> Mapping[str, Any]:
+def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any]) -> Mapping[str, Any]:
     input = event.get("input")
     inputs = event.get("inputs")
     if (input is not None and inputs is not None) or (input is None and inputs is None):
@@ -3861,7 +3856,6 @@ def log(
                 metrics=metrics,
                 id=id,
             ),
-            self.dataset is not None,
         )
         span = self._start_span_impl(start_time=self.last_start_time, lookup_span_parent=False, **event)
         self.last_start_time = span.end()

diff --git a/py/src/braintrust/otel/test_distributed_tracing.py b/py/src/braintrust/otel/test_distributed_tracing.py
@@ -123,7 +123,6 @@ def test_bt_to_otel_simple_distributed_trace(otel_fixture):
     assert len(otel_spans) == 1, "Should have 1 OTEL span from Service B"
 
     # Get the spans
-    service_a_exported = bt_spans[0]
     service_b_exported = otel_spans[0]
 
     # Convert OTEL IDs to hex for comparison

diff --git a/py/src/braintrust/otel/test_otel_bt_integration.py b/py/src/braintrust/otel/test_otel_bt_integration.py
@@ -197,7 +197,6 @@ def test_mixed_otel_bt_tracing_with_otel_first(otel_fixture):
     s1_trace_id = format(s1.context.trace_id, "032x")
     s1_span_id = format(s1.context.span_id, "016x")
     s3_trace_id = format(s3.context.trace_id, "032x")
-    s3_span_id = format(s3.context.span_id, "016x")
 
     assert s1_trace_id == s2["root_span_id"]
     assert s1_trace_id == s3_trace_id

diff --git a/py/src/braintrust/parameters.py b/py/src/braintrust/parameters.py
@@ -63,13 +63,6 @@ def from_function_row(cls, row: dict[str, Any]) -> "RemoteEvalParameters":
             data=function_data.get("data") or {},
         )
 
-    def validate(self, data: Any) -> bool:
-        try:
-            validate_json_schema(data, self.schema)
-            return True
-        except ValueError:
-            return False
-
 
 def _pydantic_to_json_schema(model: Any) -> dict[str, Any]:
     """Convert a pydantic model to JSON schema."""

diff --git a/py/src/braintrust/queue.py b/py/src/braintrust/queue.py
@@ -32,7 +32,6 @@ def __init__(self, maxsize: int = 0):
         self._mutex = threading.Lock()
         self._queue: deque[T] = deque(maxlen=maxsize)
         self._has_items_event = threading.Event()
-        self._total_dropped = 0
         self._enforce_size_limit = False
 
     def enforce_queue_size_limit(self, enforce: bool) -> None:
@@ -68,7 +67,6 @@ def put(self, item: T) -> list[T]:
                 while len(self._queue) >= self.maxsize:
                     dropped_item = self._queue.popleft()
                     dropped.append(dropped_item)
-                    self._total_dropped += 1
                 self._queue.append(item)
 
             # Signal that items are available if queue was not empty before or item was added

diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py
@@ -896,8 +896,6 @@ async def generator_with_finally() -> AsyncGenerator[int, None]:
             yield 1
             yield 2
         finally:
-            # What context do we have during cleanup?
-            cleanup_span = current_span()
             gen_span.end()
 
     # Consumer
@@ -1152,14 +1150,11 @@ def test_nested_spans_same_thread(test_logger, with_memory_logger):
 
         # Child span
         with start_span(name="child") as child_span:
-            child_id = child_span.id
-
             # Verify child is now current
             assert current_span().id == child_span.id
 
             # Grandchild span
             with start_span(name="grandchild") as grandchild_span:
-                grandchild_id = grandchild_span.id
                 assert current_span().id == grandchild_span.id
 
             # After grandchild closes, child should be current
@@ -1227,13 +1222,10 @@ def test_context_with_exception_propagation(test_logger, with_memory_logger):
     """
     Test that context is properly maintained during exception propagation.
     """
-    fail_span_id = None
 
     def failing_function():
-        nonlocal fail_span_id
         # Use context manager for proper span lifecycle
         with start_span(name="failing_span") as fail_span:
-            fail_span_id = fail_span.id
             # During this context, fail_span should be current
             assert current_span().id == fail_span.id
             raise ValueError("Expected error")

diff --git a/py/src/braintrust/test_http.py b/py/src/braintrust/test_http.py
@@ -404,17 +404,12 @@ def do_GET(self):
             session.mount("http://", adapter)
 
             errors = []
-            success_count = 0
             lock = threading.Lock()
 
             def make_request(i):
-                nonlocal success_count
                 try:
                     time.sleep(i * 0.005)  # Stagger requests
                     resp = session.get(f"{url}/test{i}")
-                    if resp.status_code == 200:
-                        with lock:
-                            success_count += 1
                     return resp.status_code
                 except Exception as e:
                     with lock:

diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
@@ -1437,9 +1437,6 @@ def test_span_set_current(with_memory_logger):
     """Test that span.set_current() makes the span accessible via current_span()."""
     init_test_logger(__name__)
 
-    # Store initial current span
-    initial_current = braintrust.current_span()
-
     # Start a span that can be set as current (default behavior)
     span1 = logger.start_span(name="test-span-1")
 

diff --git a/py/src/braintrust/wrappers/adk/__init__.py b/py/src/braintrust/wrappers/adk/__init__.py
@@ -412,8 +412,6 @@ def _determine_llm_call_type(llm_request: Any, model_response: Any = None) -> st
         request_dict = cast(dict[str, Any], bt_safe_deep_copy(llm_request))
 
         # Check if there are tools in the config
-        has_tools = bool(request_dict.get("config", {}).get("tools"))
-
         # Check the conversation history for function responses
         contents = request_dict.get("contents", [])
         has_function_response = False

diff --git a/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py b/py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py
@@ -145,9 +145,6 @@ async def run_async(self, *, args, tool_context):
 
         # Verify error was logged to span
         assert mock_span.log.called
-        # Check if error was logged
-        log_calls = [call for call in mock_span.log.call_args_list]
-        # Should have logged the error
 
 
 @pytest.mark.asyncio
@@ -316,9 +313,6 @@ async def test_real_context_loss_with_braintrust_spans():
     # Initialize a test logger
     logger = init_logger(project="test-context-loss")
 
-    # Track if we hit the context error
-    context_error_occurred = False
-
     async def problematic_generator():
         """Generator that creates a span and yields, simulating the Flow behavior."""
         from braintrust import start_span

diff --git a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
@@ -56,10 +56,10 @@ def __init__(self):
             self.name = name
             self.steps = ["first-step"]
 
-        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None):
+        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None):
             return FakeWorkflowRunResponse(input=execution_input.input, content="workflow-async")
 
-        def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
+        def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
             yield FakeEvent("WorkflowStarted", content=None)
             yield FakeEvent("StepStarted", content=None)
             yield FakeEvent("StepCompleted", content="hello ")
@@ -74,7 +74,7 @@ def __init__(self):
             self.name = name
             self.steps = ["first-step"]
 
-        def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
+        def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
             yield FakeEvent("StepCompleted", content="hello")
             yield FakeEvent("WorkflowCompleted", content="hello", metrics=FakeMetrics(), status="COMPLETED")
 
@@ -87,7 +87,7 @@ def __init__(self):
             self.name = name
             self.steps = ["first-step"]
 
-        def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
+        def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
             yield FakeEvent("WorkflowStarted", content=None)
             yield FakeEvent("StepCompleted", content="hello ")
             workflow_run_response.content = "world"
@@ -115,7 +115,7 @@ def __init__(self):
             self.steps = ["agent-step"]
             self.agent = WrappedAgent()
 
-        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None):
+        async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None):
             return await self.agent.arun(execution_input.input)
 
     return FakeWorkflow
@@ -128,7 +128,7 @@ def __init__(self):
             self.id = "workflow-agent-123"
             self.steps = ["agent-step"]
 
-        def _execute_workflow_agent(self, user_input, session, execution_input, run_context, stream=False, **kwargs):
+        def _execute_workflow_agent(self, user_input, session, execution_input, _run_context, stream=False, **kwargs):
             if stream:
 
                 def _stream():
@@ -143,7 +143,7 @@ def _stream():
                 return _stream()
             return FakeRunOutput(f"{user_input}-sync")
 
-        async def _aexecute_workflow_agent(self, user_input, run_context, execution_input, stream=False, **kwargs):
+        async def _aexecute_workflow_agent(self, user_input, _run_context, execution_input, stream=False, **kwargs):
             if stream:
 
                 async def _astream():

diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py
@@ -239,7 +239,6 @@ def __init__(self, msg_stream, span, request_start_time: float):
         super().__init__(msg_stream)
         self.__msg_stream = msg_stream
         self.__span = span
-        self.__metrics = {}
         self.__snapshot = None
         self.__request_start_time = request_start_time
         self.__time_to_first_token: float | None = None
@@ -358,10 +357,6 @@ def wrap_anthropic(client):
         return client
 
 
-def wrap_anthropic_client(client):
-    return wrap_anthropic(client)
-
-
 def _apply_anthropic_wrapper(client):
     """Apply tracing wrapper to an Anthropic client instance in-place."""
     wrapped = wrap_anthropic(client)