Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,8 @@ repos:
args:
- "-L"
- "rouge,coo,couldn,unsecure,ontext,afterall,als"
- repo: https://github.com/jendrikseipp/vulture
rev: v2.15
hooks:
- id: vulture
pass_filenames: false
2 changes: 1 addition & 1 deletion py/src/braintrust/cli/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def check_match(path_input, include_patterns, exclude_patterns):

def collect_files(input_path):
if os.path.isdir(input_path):
for root, dirs, files in os.walk(input_path):
for root, _, files in os.walk(input_path):
for file in files:
fname = os.path.join(root, file)
if check_match(fname, INCLUDE, EXCLUDE):
Expand Down
1 change: 0 additions & 1 deletion py/src/braintrust/cli/install/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def get_events(stream):
with ThreadPoolExecutor(8) as executor:
events = executor.map(get_events, all_streams)

last_ts = None
for stream, log in zip(all_streams, events):
print(f"---- LOG STREAM: {stream['logStreamName']}")
for event in log["events"]:
Expand Down
9 changes: 0 additions & 9 deletions py/src/braintrust/db_fields.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
TRANSACTION_ID_FIELD = "_xact_id"
OBJECT_DELETE_FIELD = "_object_delete"
CREATED_FIELD = "created"
ID_FIELD = "id"

IS_MERGE_FIELD = "_is_merge"
MERGE_PATHS_FIELD = "_merge_paths"
ARRAY_DELETE_FIELD = "_array_delete"

AUDIT_SOURCE_FIELD = "_audit_source"
AUDIT_METADATA_FIELD = "_audit_metadata"
VALID_SOURCES = ["app", "api", "external"]

PARENT_ID_FIELD = "_parent_id"

ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control"
SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring"

# Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to.
OBJECT_ID_KEYS = (
"experiment_id",
Expand Down
33 changes: 12 additions & 21 deletions py/src/braintrust/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,17 @@

# https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
class bcolors:
HEADER = "\033[95m"
OKBLUE = "\033[94m"
OKCYAN = "\033[96m"
OKGREEN = "\033[92m"
# HEADER = "\033[95m"
# OKBLUE = "\033[94m"
# OKCYAN = "\033[96m"
# OKGREEN = "\033[92m"
WARNING = "\033[93m"
FAIL = "\033[91m"
ENDC = "\033[0m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"


# BOLD = "\033[1m"
# UNDERLINE = "\033[4m"


@dataclasses.dataclass
Expand Down Expand Up @@ -203,7 +205,7 @@ def tags(self) -> Sequence[str]:
"""

@abc.abstractmethod
def report_progress(self, progress: TaskProgressEvent) -> None:
def report_progress(self, _progress: TaskProgressEvent) -> None:
"""
Report progress that will show up in the playground.
"""
Expand All @@ -228,17 +230,6 @@ def parameters(self) -> ValidatedParameters | None:
"""


class EvalScorerArgs(SerializableDataClass, Generic[Input, Output]):
"""
Arguments passed to an evaluator scorer. This includes the input, expected output, actual output, and metadata.
"""

input: Input
output: Output
expected: Output | None = None
metadata: Metadata | None = None


OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]]


Expand Down Expand Up @@ -459,7 +450,7 @@ class EvalResultWithSummary(SerializableDataClass, Generic[Input, Output]):
summary: ExperimentSummary
results: list[EvalResult[Input, Output]]

def _repr_pretty_(self, p, cycle):
def _repr_pretty_(self, p, _cycle):
p.text(f'EvalResultWithSummary(summary="...", results=[...])')


Expand Down Expand Up @@ -850,7 +841,7 @@ async def EvalAsync(
:param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`.
:param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation.
:param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function
that takes an `EvalScorerArgs` object and returns a `Score` object.
that takes `(input, output, expected)` arguments and returns a `Score` object.
:param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically.
:param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that
have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results.
Expand Down Expand Up @@ -977,7 +968,7 @@ def Eval(
:param data: Returns an iterator over the evaluation dataset. Each element of the iterator should be a `EvalCase`.
:param task: Runs the evaluation task on a single input. The `hooks` object can be used to add metadata to the evaluation.
:param scores: A list of scorers to evaluate the results of the task. Each scorer can be a Scorer object or a function
that takes an `EvalScorerArgs` object and returns a `Score` object.
that takes `(input, output, expected)` arguments and returns a `Score` object.
:param experiment_name: (Optional) Experiment name. If not specified, a name will be generated automatically.
:param trial_count: The number of times to run the evaluator per input. This is useful for evaluating applications that
have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the variance in the results.
Expand Down
4 changes: 0 additions & 4 deletions py/src/braintrust/http_headers.py

This file was deleted.

10 changes: 2 additions & 8 deletions py/src/braintrust/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,9 +1060,6 @@ def __init__(self, api_conn: LazyValue[HTTPConnection]):
self.logger = logging.getLogger("braintrust")
self.queue: "LogQueue[LazyValue[Dict[str, Any]]]" = LogQueue(maxsize=self.queue_maxsize)

# Counter for tracking overflow uploads (useful for testing)
self._overflow_upload_count = 0

if not disable_atexit_flush:
atexit.register(self._finalize)

Expand Down Expand Up @@ -1374,8 +1371,6 @@ def _submit_logs_request(self, items: Sequence[LogItemWithMeta], max_request_siz
except Exception as e:
error = e
if error is None and resp is not None and resp.ok:
if overflow_rows:
self._overflow_upload_count += 1
return
if error is None and resp is not None:
resp_errmsg = f"{resp.status_code}: {resp.text}"
Expand Down Expand Up @@ -1437,7 +1432,7 @@ def _register_dropped_item_count(self, num_items):
self._queue_drop_logging_state["last_logged_timestamp"] = time_now

@staticmethod
def _write_payload_to_dir(payload_dir, payload, debug_logging_adjective=None):
def _write_payload_to_dir(payload_dir, payload):
payload_file = os.path.join(payload_dir, f"payload_{time.time()}_{str(uuid.uuid4())[:8]}.json")
try:
os.makedirs(payload_dir, exist_ok=True)
Expand Down Expand Up @@ -2831,7 +2826,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any])
# Note that this only checks properties that are expected of a complete event.
# _validate_and_sanitize_experiment_log_partial_args should still be invoked
# (after handling special fields like 'id').
def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any], has_dataset: bool) -> Mapping[str, Any]:
def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any]) -> Mapping[str, Any]:
input = event.get("input")
inputs = event.get("inputs")
if (input is not None and inputs is not None) or (input is None and inputs is None):
Expand Down Expand Up @@ -3861,7 +3856,6 @@ def log(
metrics=metrics,
id=id,
),
self.dataset is not None,
)
span = self._start_span_impl(start_time=self.last_start_time, lookup_span_parent=False, **event)
self.last_start_time = span.end()
Expand Down
1 change: 0 additions & 1 deletion py/src/braintrust/otel/test_distributed_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def test_bt_to_otel_simple_distributed_trace(otel_fixture):
assert len(otel_spans) == 1, "Should have 1 OTEL span from Service B"

# Get the spans
service_a_exported = bt_spans[0]
service_b_exported = otel_spans[0]

# Convert OTEL IDs to hex for comparison
Expand Down
1 change: 0 additions & 1 deletion py/src/braintrust/otel/test_otel_bt_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ def test_mixed_otel_bt_tracing_with_otel_first(otel_fixture):
s1_trace_id = format(s1.context.trace_id, "032x")
s1_span_id = format(s1.context.span_id, "016x")
s3_trace_id = format(s3.context.trace_id, "032x")
s3_span_id = format(s3.context.span_id, "016x")

assert s1_trace_id == s2["root_span_id"]
assert s1_trace_id == s3_trace_id
Expand Down
7 changes: 0 additions & 7 deletions py/src/braintrust/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,6 @@ def from_function_row(cls, row: dict[str, Any]) -> "RemoteEvalParameters":
data=function_data.get("data") or {},
)

def validate(self, data: Any) -> bool:
try:
validate_json_schema(data, self.schema)
return True
except ValueError:
return False


def _pydantic_to_json_schema(model: Any) -> dict[str, Any]:
"""Convert a pydantic model to JSON schema."""
Expand Down
2 changes: 0 additions & 2 deletions py/src/braintrust/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self, maxsize: int = 0):
self._mutex = threading.Lock()
self._queue: deque[T] = deque(maxlen=maxsize)
self._has_items_event = threading.Event()
self._total_dropped = 0
self._enforce_size_limit = False

def enforce_queue_size_limit(self, enforce: bool) -> None:
Expand Down Expand Up @@ -68,7 +67,6 @@ def put(self, item: T) -> list[T]:
while len(self._queue) >= self.maxsize:
dropped_item = self._queue.popleft()
dropped.append(dropped_item)
self._total_dropped += 1
self._queue.append(item)

# Signal that items are available if queue was not empty before or item was added
Expand Down
8 changes: 0 additions & 8 deletions py/src/braintrust/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,8 +896,6 @@ async def generator_with_finally() -> AsyncGenerator[int, None]:
yield 1
yield 2
finally:
# What context do we have during cleanup?
cleanup_span = current_span()
gen_span.end()

# Consumer
Expand Down Expand Up @@ -1152,14 +1150,11 @@ def test_nested_spans_same_thread(test_logger, with_memory_logger):

# Child span
with start_span(name="child") as child_span:
child_id = child_span.id

# Verify child is now current
assert current_span().id == child_span.id

# Grandchild span
with start_span(name="grandchild") as grandchild_span:
grandchild_id = grandchild_span.id
assert current_span().id == grandchild_span.id

# After grandchild closes, child should be current
Expand Down Expand Up @@ -1227,13 +1222,10 @@ def test_context_with_exception_propagation(test_logger, with_memory_logger):
"""
Test that context is properly maintained during exception propagation.
"""
fail_span_id = None

def failing_function():
nonlocal fail_span_id
# Use context manager for proper span lifecycle
with start_span(name="failing_span") as fail_span:
fail_span_id = fail_span.id
# During this context, fail_span should be current
assert current_span().id == fail_span.id
raise ValueError("Expected error")
Expand Down
5 changes: 0 additions & 5 deletions py/src/braintrust/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,17 +404,12 @@ def do_GET(self):
session.mount("http://", adapter)

errors = []
success_count = 0
lock = threading.Lock()

def make_request(i):
nonlocal success_count
try:
time.sleep(i * 0.005) # Stagger requests
resp = session.get(f"{url}/test{i}")
if resp.status_code == 200:
with lock:
success_count += 1
return resp.status_code
except Exception as e:
with lock:
Expand Down
3 changes: 0 additions & 3 deletions py/src/braintrust/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1437,9 +1437,6 @@ def test_span_set_current(with_memory_logger):
"""Test that span.set_current() makes the span accessible via current_span()."""
init_test_logger(__name__)

# Store initial current span
initial_current = braintrust.current_span()

# Start a span that can be set as current (default behavior)
span1 = logger.start_span(name="test-span-1")

Expand Down
2 changes: 0 additions & 2 deletions py/src/braintrust/wrappers/adk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,6 @@ def _determine_llm_call_type(llm_request: Any, model_response: Any = None) -> st
request_dict = cast(dict[str, Any], bt_safe_deep_copy(llm_request))

# Check if there are tools in the config
has_tools = bool(request_dict.get("config", {}).get("tools"))

# Check the conversation history for function responses
contents = request_dict.get("contents", [])
has_function_response = False
Expand Down
6 changes: 0 additions & 6 deletions py/src/braintrust/wrappers/adk/test_adk_mcp_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ async def run_async(self, *, args, tool_context):

# Verify error was logged to span
assert mock_span.log.called
# Check if error was logged
log_calls = [call for call in mock_span.log.call_args_list]
# Should have logged the error


@pytest.mark.asyncio
Expand Down Expand Up @@ -316,9 +313,6 @@ async def test_real_context_loss_with_braintrust_spans():
# Initialize a test logger
logger = init_logger(project="test-context-loss")

# Track if we hit the context error
context_error_occurred = False

async def problematic_generator():
"""Generator that creates a span and yields, simulating the Flow behavior."""
from braintrust import start_span
Expand Down
14 changes: 7 additions & 7 deletions py/src/braintrust/wrappers/agno/_test_agno_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ def __init__(self):
self.name = name
self.steps = ["first-step"]

async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None):
async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None):
return FakeWorkflowRunResponse(input=execution_input.input, content="workflow-async")

def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
yield FakeEvent("WorkflowStarted", content=None)
yield FakeEvent("StepStarted", content=None)
yield FakeEvent("StepCompleted", content="hello ")
Expand All @@ -74,7 +74,7 @@ def __init__(self):
self.name = name
self.steps = ["first-step"]

def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
yield FakeEvent("StepCompleted", content="hello")
yield FakeEvent("WorkflowCompleted", content="hello", metrics=FakeMetrics(), status="COMPLETED")

Expand All @@ -87,7 +87,7 @@ def __init__(self):
self.name = name
self.steps = ["first-step"]

def _execute_stream(self, session, execution_input, workflow_run_response, run_context=None):
def _execute_stream(self, session, execution_input, workflow_run_response, _run_context=None):
yield FakeEvent("WorkflowStarted", content=None)
yield FakeEvent("StepCompleted", content="hello ")
workflow_run_response.content = "world"
Expand Down Expand Up @@ -115,7 +115,7 @@ def __init__(self):
self.steps = ["agent-step"]
self.agent = WrappedAgent()

async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, run_context=None):
async def _aexecute(self, session_id, user_id, execution_input, workflow_run_response, _run_context=None):
return await self.agent.arun(execution_input.input)

return FakeWorkflow
Expand All @@ -128,7 +128,7 @@ def __init__(self):
self.id = "workflow-agent-123"
self.steps = ["agent-step"]

def _execute_workflow_agent(self, user_input, session, execution_input, run_context, stream=False, **kwargs):
def _execute_workflow_agent(self, user_input, session, execution_input, _run_context, stream=False, **kwargs):
if stream:

def _stream():
Expand All @@ -143,7 +143,7 @@ def _stream():
return _stream()
return FakeRunOutput(f"{user_input}-sync")

async def _aexecute_workflow_agent(self, user_input, run_context, execution_input, stream=False, **kwargs):
async def _aexecute_workflow_agent(self, user_input, _run_context, execution_input, stream=False, **kwargs):
if stream:

async def _astream():
Expand Down
5 changes: 0 additions & 5 deletions py/src/braintrust/wrappers/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def __init__(self, msg_stream, span, request_start_time: float):
super().__init__(msg_stream)
self.__msg_stream = msg_stream
self.__span = span
self.__metrics = {}
self.__snapshot = None
self.__request_start_time = request_start_time
self.__time_to_first_token: float | None = None
Expand Down Expand Up @@ -358,10 +357,6 @@ def wrap_anthropic(client):
return client


def wrap_anthropic_client(client):
return wrap_anthropic(client)


def _apply_anthropic_wrapper(client):
"""Apply tracing wrapper to an Anthropic client instance in-place."""
wrapped = wrap_anthropic(client)
Expand Down
Loading