Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
litellm_pr_draft.md
__pycache__/
*.py[cod]
*$py.class
Expand Down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Your App → Token0 Proxy → [Analyze → Classify → Route → Transform →
Database (logs every optimization decision + savings)
```

Token0 applies **10 optimizations** automatically:
Token0 applies **11 optimizations** automatically:

### Core Optimizations (Free Tier)

Expand All @@ -56,6 +56,8 @@ Token0 applies **10 optimizations** automatically:

**10. Video Optimization** — Automatically extract keyframes from video at 1fps, deduplicate similar consecutive frames using QJL perceptual hashing, detect scene changes via pixel-level diff, and run each keyframe through the full image optimization pipeline. A 60-second video at 30fps (1,800 frames) reduces to ~10 keyframes before being sent to the LLM. **13-45% savings on local models; ~83% projected savings on GPT-4.1.** Optional CLIP-based query-frame scoring (Layer 2) ranks frames by relevance to the user's prompt.

**11. Saliency-Based ROI Cropping** — Detects which region of an image the prompt is asking about and crops to that region before sending to the LLM. "What's the total on this invoice?" → crops to the bottom 40% of the image. "Read the header" → crops to the top 25%. Rule-based spatial keyword matching (zero ML deps). Delivers ~60% additional pixel reduction on document and form images before any other optimization runs.

---

## Benchmarks
Expand Down Expand Up @@ -482,6 +484,10 @@ curl http://localhost:8000/v1/usage
}
```

### Savings Dashboard

Open `http://localhost:8000/dashboard` in your browser for a live view of total requests, tokens saved, cost saved, and per-optimization breakdown. Auto-refreshes every 10 seconds.

### Run Benchmarks Yourself

```bash
Expand Down
141 changes: 141 additions & 0 deletions test_token0_litellm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Tests for the Token0 LiteLLM CustomLogger integration.

These tests verify the Token0Hook contract without making real API calls.
Token0 is installed separately: pip install token0
"""

import pytest
from unittest.mock import patch


def _make_image_message(url: str = "data:image/jpeg;base64,/9j/fake") -> dict:
return {
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": url}},
],
}


# ---------------------------------------------------------------------------
# Import guard — skip entire module if token0 is not installed
# ---------------------------------------------------------------------------

token0 = pytest.importorskip("token0", reason="token0 not installed")


# ---------------------------------------------------------------------------
# Hook contract tests
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_token0_hook_passthrough_for_non_completion():
"""Hook must return data unchanged when call_type != 'completion'."""
from token0.litellm_hook import Token0Hook

hook = Token0Hook()
data = {"messages": [_make_image_message()], "model": "gpt-4o"}
result = await hook.async_pre_call_hook(
user_api_key_dict={}, cache=None, data=data, call_type="embedding"
)
assert result is data


@pytest.mark.asyncio
async def test_token0_hook_passthrough_for_empty_messages():
"""Hook must return data unchanged when messages is empty."""
from token0.litellm_hook import Token0Hook

hook = Token0Hook()
data = {"messages": [], "model": "gpt-4o"}
result = await hook.async_pre_call_hook(
user_api_key_dict={}, cache=None, data=data, call_type="completion"
)
assert result is data


@pytest.mark.asyncio
async def test_token0_hook_text_only_passthrough():
"""Text-only messages must pass through with zero overhead."""
from token0.litellm_hook import Token0Hook

hook = Token0Hook()
original_messages = [{"role": "user", "content": "Hello, what is 2+2?"}]
data = {"messages": original_messages, "model": "gpt-4o"}

result = await hook.async_pre_call_hook(
user_api_key_dict={}, cache=None, data=data, call_type="completion"
)

assert result["messages"] == original_messages


@pytest.mark.asyncio
async def test_token0_hook_attaches_stats_metadata():
"""Hook must attach token0 stats to data['metadata']['token0']."""
from token0.litellm_hook import Token0Hook

hook = Token0Hook()
messages = [_make_image_message()]
data = {"messages": messages, "model": "gpt-4o"}

mock_stats = {
"tokens_before": 765,
"tokens_after": 85,
"tokens_saved": 680,
"optimizations": ["prompt-aware→low detail"],
"recommended_model": None,
}

with patch(
"token0.litellm_hook.optimize_messages",
return_value=(messages, mock_stats),
):
result = await hook.async_pre_call_hook(
user_api_key_dict={}, cache=None, data=data, call_type="completion"
)

assert "metadata" in result
assert "token0" in result["metadata"]
assert result["metadata"]["token0"]["tokens_saved"] == 680


@pytest.mark.asyncio
async def test_token0_hook_remote_url_passthrough():
"""Images with remote http/https URLs must not be modified."""
from token0.litellm_hook import Token0Hook

hook = Token0Hook()
remote_url = "https://example.com/photo.jpg"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this"},
{"type": "image_url", "image_url": {"url": remote_url}},
],
}
]
data = {"messages": messages, "model": "gpt-4o"}

mock_stats = {
"tokens_before": 0,
"tokens_after": 0,
"tokens_saved": 0,
"optimizations": [],
"recommended_model": None,
}

with patch(
"token0.litellm_hook.optimize_messages",
return_value=(messages, mock_stats),
):
result = await hook.async_pre_call_hook(
user_api_key_dict={}, cache=None, data=data, call_type="completion"
)

content = result["messages"][0]["content"]
image_parts = [p for p in content if p.get("type") == "image_url"]
assert image_parts[0]["image_url"]["url"] == remote_url
125 changes: 125 additions & 0 deletions tests/test_saliency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Tests for saliency-based ROI cropping."""

from PIL import Image

from token0.optimization.saliency import SaliencyResult, apply_saliency_crop, detect_roi


def _make_image(w: int = 800, h: int = 1000) -> Image.Image:
return Image.new("RGB", (w, h), color=(200, 200, 200))


# ---------------------------------------------------------------------------
# detect_roi — keyword matching
# ---------------------------------------------------------------------------


def test_footer_keyword_crops_bottom():
img = _make_image()
result = detect_roi("What is the total amount on this invoice?", img)
assert result.cropped is True
assert result.matched_keyword is not None
# Bottom crop — top edge should be > 50% down
_, top, _, bottom = result.crop_box
assert top > img.height * 0.5
assert bottom == img.height


def test_header_keyword_crops_top():
img = _make_image()
result = detect_roi("Read the header text", img)
assert result.cropped is True
left, top, right, bottom = result.crop_box
assert top == 0
assert bottom < img.height * 0.5


def test_top_right_keyword():
img = _make_image()
result = detect_roi("What is the date on this document?", img)
assert result.cropped is True
left, top, right, bottom = result.crop_box
assert left > 0 # right half
assert top == 0


def test_bottom_right_keyword():
img = _make_image()
result = detect_roi("What does the signature say at the bottom right?", img)
assert result.cropped is True
# "signature" matches footer rule (full-width bottom strip) — still a valid crop
_, top, _, bottom = result.crop_box
assert top > img.height * 0.5
assert bottom == img.height


def test_no_match_returns_not_cropped():
img = _make_image()
result = detect_roi("Describe this image", img)
assert result.cropped is False
assert result.crop_box is None
assert result.savings_pct == 0.0


def test_empty_prompt_returns_not_cropped():
img = _make_image()
result = detect_roi("", img)
assert result.cropped is False


def test_tiny_image_skipped():
img = _make_image(100, 100)
result = detect_roi("What is the total?", img)
assert result.cropped is False


def test_savings_pct_is_meaningful():
img = _make_image()
result = detect_roi("Read the header", img)
assert result.cropped is True
assert result.savings_pct >= 0.20


# ---------------------------------------------------------------------------
# apply_saliency_crop
# ---------------------------------------------------------------------------


def test_crop_produces_correct_dimensions():
img = _make_image(800, 1000)
result = detect_roi("What is the total?", img)
assert result.cropped
cropped = apply_saliency_crop(img, result)
left, top, right, bottom = result.crop_box
assert cropped.size == (right - left, bottom - top)


def test_no_crop_returns_original():
img = _make_image()
result = SaliencyResult(cropped=False, crop_box=None, matched_keyword=None, savings_pct=0.0)
out = apply_saliency_crop(img, result)
assert out is img


# ---------------------------------------------------------------------------
# Integration: detect_roi → apply_saliency_crop produces smaller image
# ---------------------------------------------------------------------------


def test_cropped_image_is_smaller():
img = _make_image(800, 1000)
result = detect_roi("What is the invoice total?", img)
assert result.cropped
cropped = apply_saliency_crop(img, result)
orig_area = img.width * img.height
crop_area = cropped.width * cropped.height
assert crop_area < orig_area


def test_center_keyword():
img = _make_image()
result = detect_roi("What is in the center of this image?", img)
assert result.cropped is True
left, top, right, bottom = result.crop_box
assert left > 0 and top > 0
assert right < img.width and bottom < img.height
12 changes: 12 additions & 0 deletions token0/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import logging
import pathlib
from contextlib import asynccontextmanager

from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles

from token0.api.v1.chat import router as chat_router
from token0.api.v1.estimate import router as estimate_router
Expand Down Expand Up @@ -45,6 +48,15 @@ async def lifespan(app: FastAPI):
app.include_router(usage_router, prefix="/v1")


_static = pathlib.Path(__file__).parent / "static"
app.mount("/static", StaticFiles(directory=_static), name="static")


@app.get("/dashboard", response_class=HTMLResponse)
async def dashboard():
return HTMLResponse((_static / "dashboard.html").read_text())


@app.get("/health")
async def health():
return {
Expand Down
19 changes: 19 additions & 0 deletions token0/optimization/message_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import logging

from token0.optimization.analyzer import analyze_image
from token0.optimization.prompt_classifier import extract_prompt_text
from token0.optimization.router import plan_optimization
from token0.optimization.saliency import apply_saliency_crop, detect_roi
from token0.optimization.transformer import transform_image

logger = logging.getLogger("token0.optimizer")
Expand All @@ -27,6 +29,7 @@ def optimize_messages(
total_after = 0
optimizations = []
recommended_model = None
prompt_text = extract_prompt_text(messages)

for msg in messages:
content = msg.get("content")
Expand Down Expand Up @@ -84,6 +87,22 @@ def optimize_messages(

try:
analysis, raw_bytes, pil_image = analyze_image(url)

# Saliency crop — trim to region the prompt asks about
saliency = detect_roi(prompt_text, pil_image)
if saliency.cropped:
pil_image = apply_saliency_crop(pil_image, saliency)
# Re-encode cropped image to bytes for downstream steps
import io as _io

fmt = "JPEG" if analysis.format == "jpg" else analysis.format.upper()
buf = _io.BytesIO()
pil_image.save(buf, format=fmt)
raw_bytes = buf.getvalue()
kw, pct = saliency.matched_keyword, saliency.savings_pct
optimizations.append(f"saliency crop ({kw!r}: {pct:.0%} pixels removed)")
logger.debug("token0: saliency crop on %r, savings=%.0f%%", kw, pct * 100)

plan = plan_optimization(
analysis,
model,
Expand Down
Loading
Loading