Skip to content

Commit 73beaee

Browse files
Matt Perpickclaude
andcommitted
style: fix ruff format issues
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 323299c commit 73beaee

3 files changed

Lines changed: 211 additions & 1 deletion

File tree

py/bench_compare.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""Side-by-side benchmark. Run on main, then on the perf branch."""
2+
import os, sys, time
3+
os.environ["BRAINTRUST_DISABLE_ATEXIT_FLUSH"] = "true"
4+
sys.path.insert(0, "src")
5+
6+
from braintrust.logger import (
7+
BraintrustState, SpanImpl, _MemoryBackgroundLogger, SpanObjectTypeV3,
8+
stringify_with_overflow_meta,
9+
)
10+
from braintrust.merge_row_batch import merge_row_batch
11+
from braintrust.bt_json import bt_safe_deep_copy
12+
from braintrust.util import LazyValue
13+
14+
PID = LazyValue(lambda: "test", use_mutex=False)
15+
PID.get()
16+
17+
MED = {
18+
"input": {"messages": [{"role": "user", "content": "What is 2+2?"},
19+
{"role": "assistant", "content": "4"}]},
20+
"output": {"result": "The answer is 4", "confidence": 0.95},
21+
"scores": {"accuracy": 0.9, "relevance": 0.85},
22+
"metadata": {"model": "gpt-4", "temperature": 0.7, "max_tokens": 100},
23+
}
24+
LARGE = {
25+
"input": {"messages": [{"role": "user", "content": f"msg {i}" * 20} for i in range(10)]},
26+
"output": {"result": "x" * 5000,
27+
"details": {f"k{i}": f"v{i}" * 10 for i in range(50)}},
28+
"scores": {f"s{i}": i / 100.0 for i in range(20)},
29+
"metadata": {f"m{i}": f"v{i}" * 5 for i in range(30)},
30+
}
31+
32+
def fresh():
33+
s = BraintrustState()
34+
ml = _MemoryBackgroundLogger()
35+
s._override_bg_logger.logger = ml
36+
return s, ml
37+
38+
def bench(label, fn, N):
39+
# warmup
40+
fn()
41+
t0 = time.perf_counter()
42+
for _ in range(N):
43+
fn()
44+
us = (time.perf_counter() - t0) / N * 1e6
45+
print(f" {label:40s} {us:8.1f} us/op")
46+
return us
47+
48+
N = 5000
49+
print(f"N={N}\n")
50+
51+
# -- user thread --
52+
print("User thread:")
53+
54+
s, ml = fresh()
55+
bench("start_span (medium)", lambda: SpanImpl(
56+
parent_object_type=SpanObjectTypeV3.PROJECT_LOGS, parent_object_id=PID,
57+
parent_compute_object_metadata_args=None, parent_span_ids=None,
58+
name="b", state=s, event=dict(MED), lookup_span_parent=False), N)
59+
60+
s, ml = fresh()
61+
bench("start_span (large)", lambda: SpanImpl(
62+
parent_object_type=SpanObjectTypeV3.PROJECT_LOGS, parent_object_id=PID,
63+
parent_compute_object_metadata_args=None, parent_span_ids=None,
64+
name="b", state=s, event=dict(LARGE), lookup_span_parent=False), N)
65+
66+
s, ml = fresh()
67+
def _tree():
68+
root = SpanImpl(parent_object_type=SpanObjectTypeV3.PROJECT_LOGS, parent_object_id=PID,
69+
parent_compute_object_metadata_args=None, parent_span_ids=None,
70+
name="root", state=s, event=dict(MED), lookup_span_parent=False)
71+
c = root.start_span(name="child", input="x", output="y")
72+
c.end(); root.end()
73+
bench("root + child + end (medium)", _tree, N)
74+
75+
s, ml = fresh()
76+
span = SpanImpl(parent_object_type=SpanObjectTypeV3.PROJECT_LOGS, parent_object_id=PID,
77+
parent_compute_object_metadata_args=None, parent_span_ids=None,
78+
name="b", state=s, lookup_span_parent=False)
79+
bench("span.log (medium)", lambda: span.log(**MED), N)
80+
81+
# -- deep copy --
82+
print("\nDeep copy:")
83+
bench("bt_safe_deep_copy (medium)", lambda: bt_safe_deep_copy(MED), N)
84+
bench("bt_safe_deep_copy (large)", lambda: bt_safe_deep_copy(LARGE), N)
85+
86+
# -- flush thread --
87+
print("\nFlush thread:")
88+
for count in (1000, 5000):
89+
s2, ml2 = fresh()
90+
for _ in range(count):
91+
SpanImpl(parent_object_type=SpanObjectTypeV3.PROJECT_LOGS, parent_object_id=PID,
92+
parent_compute_object_metadata_args=None, parent_span_ids=None,
93+
name="b", state=s2, event=dict(MED), lookup_span_parent=False)
94+
items = ml2.logs[:]
95+
def _flush(items=items):
96+
unwrapped = [it.get() for it in items]
97+
merged = merge_row_batch(unwrapped)
98+
_ = [stringify_with_overflow_meta(m) for m in merged]
99+
t0 = time.perf_counter()
100+
_flush()
101+
elapsed = time.perf_counter() - t0
102+
print(f" flush {count} items (medium) {elapsed/count*1e6:8.1f} us/item")

py/bench_e2e.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""End-to-end CPU time benchmark simulating real usage patterns.
2+
3+
Measures total CPU cost of a realistic workload:
4+
- Init a logger
5+
- Create spans with nested children
6+
- Log inputs/outputs/scores
7+
- Flush everything
8+
9+
Run: cd py && python bench_e2e.py
10+
"""
11+
import os, sys, time
12+
os.environ["BRAINTRUST_DISABLE_ATEXIT_FLUSH"] = "true"
13+
sys.path.insert(0, "src")
14+
15+
from braintrust.logger import (
16+
BraintrustState, SpanImpl, _MemoryBackgroundLogger, SpanObjectTypeV3,
17+
stringify_with_overflow_meta,
18+
)
19+
from braintrust.merge_row_batch import merge_row_batch
20+
from braintrust.util import LazyValue
21+
22+
23+
def run_workload(num_requests):
24+
"""Simulate num_requests LLM calls, each with a root span + child span + logging."""
25+
state = BraintrustState()
26+
ml = _MemoryBackgroundLogger()
27+
state._override_bg_logger.logger = ml
28+
pid = LazyValue(lambda: "proj-abc123", use_mutex=False)
29+
pid.get()
30+
31+
# Phase 1: create spans and log (user thread work)
32+
t_start = time.perf_counter()
33+
34+
for i in range(num_requests):
35+
# Root span: the overall request
36+
root = SpanImpl(
37+
parent_object_type=SpanObjectTypeV3.PROJECT_LOGS,
38+
parent_object_id=pid,
39+
parent_compute_object_metadata_args=None,
40+
parent_span_ids=None,
41+
name="handle_request",
42+
state=state,
43+
event={
44+
"input": {
45+
"messages": [
46+
{"role": "system", "content": "You are a helpful assistant."},
47+
{"role": "user", "content": f"Question {i}: What is {i} + {i}?"},
48+
]
49+
},
50+
"metadata": {"user_id": f"user_{i % 100}", "session_id": "sess_abc"},
51+
},
52+
lookup_span_parent=False,
53+
)
54+
55+
# Child span: LLM call
56+
child = root.start_span(
57+
name="llm_call",
58+
input={"model": "gpt-4", "temperature": 0.7, "max_tokens": 500},
59+
)
60+
61+
# Log the LLM response
62+
child.log(
63+
output={
64+
"choices": [{"message": {"role": "assistant", "content": f"The answer is {i*2}."}}],
65+
"usage": {"prompt_tokens": 50, "completion_tokens": 20, "total_tokens": 70},
66+
},
67+
metrics={"latency": 0.234, "tokens_per_second": 85.5},
68+
)
69+
child.end()
70+
71+
# Log scores on root
72+
root.log(
73+
output=f"The answer is {i*2}.",
74+
scores={"accuracy": 0.95, "relevance": 0.88},
75+
)
76+
root.end()
77+
78+
t_user = time.perf_counter() - t_start
79+
80+
# Phase 2: flush (background thread work)
81+
t_flush_start = time.perf_counter()
82+
items = ml.logs[:]
83+
unwrapped = [it.get() for it in items]
84+
merged = merge_row_batch(unwrapped)
85+
stringified = [stringify_with_overflow_meta(m) for m in merged]
86+
t_flush = time.perf_counter() - t_flush_start
87+
88+
return t_user, t_flush, len(items), len(merged)
89+
90+
91+
# Warmup
92+
run_workload(10)
93+
94+
print("End-to-end CPU time benchmark")
95+
print("=" * 60)
96+
97+
for num_requests in [100, 500, 1000, 5000]:
98+
t_user, t_flush, num_items, num_merged = run_workload(num_requests)
99+
t_total = t_user + t_flush
100+
101+
print(f"\n{num_requests} requests ({num_items} log items -> {num_merged} merged):")
102+
print(f" User thread: {t_user*1000:8.1f} ms ({t_user/num_requests*1e6:6.1f} us/req)")
103+
print(f" Flush: {t_flush*1000:8.1f} ms ({t_flush/num_merged*1e6:6.1f} us/item)")
104+
print(f" Total CPU: {t_total*1000:8.1f} ms ({t_total/num_requests*1e6:6.1f} us/req)")

py/src/braintrust/logger.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4341,7 +4341,7 @@ def log_internal(self, event: dict[str, Any] | None = None, internal_data: dict[
43414341
# Cache the object_id_fields result since it's constant per span.
43424342
# Uses a LazyValue to defer evaluation until flush time (when
43434343
# parent_object_id may still be resolving).
4344-
if not hasattr(self, '_cached_object_id_fields'):
4344+
if not hasattr(self, "_cached_object_id_fields"):
43454345
parent_object_type = self.parent_object_type
43464346
parent_object_id = self.parent_object_id
43474347
exporter = _get_exporter()
@@ -4355,13 +4355,15 @@ def log_internal(self, event: dict[str, Any] | None = None, internal_data: dict[
43554355
cached_fields = self._cached_object_id_fields
43564356

43574357
if lazy_partial_record:
4358+
43584359
def compute_record() -> dict[str, Any]:
43594360
return dict(
43604361
**serializable_partial_record,
43614362
**{k: v.get() for k, v in lazy_partial_record.items()},
43624363
**cached_fields.get(),
43634364
)
43644365
else:
4366+
43654367
def compute_record() -> dict[str, Any]:
43664368
return dict(
43674369
**serializable_partial_record,
@@ -4649,8 +4651,10 @@ def split_logging_data(
46494651
serializable_partial_record: dict[str, Any] = {}
46504652
for k2, v2 in sanitized_and_internal_data.items():
46514653
if isinstance(v2, BraintrustStream):
4654+
46524655
def make_final_value_callback(v2):
46534656
return LazyValue(lambda: v2.copy().final_value(), use_mutex=False)
4657+
46544658
lazy_partial_record[k2] = make_final_value_callback(v2)
46554659
else:
46564660
serializable_partial_record[k2] = v2

0 commit comments

Comments
 (0)