Pritom14 · Pritom14 · Apr 2, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 2, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+litellm_pr_draft.md
 __pycache__/
 *.py[cod]
 *$py.class

diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ Your App → Token0 Proxy → [Analyze → Classify → Route → Transform →
          Database (logs every optimization decision + savings)
 ```
 
-Token0 applies **10 optimizations** automatically:
+Token0 applies **11 optimizations** automatically:
 
 ### Core Optimizations (Free Tier)
 
@@ -56,6 +56,8 @@ Token0 applies **10 optimizations** automatically:
 
 **10. Video Optimization** — Automatically extract keyframes from video at 1fps, deduplicate similar consecutive frames using QJL perceptual hashing, detect scene changes via pixel-level diff, and run each keyframe through the full image optimization pipeline. A 60-second video at 30fps (1,800 frames) reduces to ~10 keyframes before being sent to the LLM. **13-45% savings on local models; ~83% projected savings on GPT-4.1.** Optional CLIP-based query-frame scoring (Layer 2) ranks frames by relevance to the user's prompt.
 
+**11. Saliency-Based ROI Cropping** — Detects which region of an image the prompt is asking about and crops to that region before sending to the LLM. "What's the total on this invoice?" → crops to the bottom 40% of the image. "Read the header" → crops to the top 25%. Rule-based spatial keyword matching (zero ML deps). Delivers ~60% additional pixel reduction on document and form images before any other optimization runs.
+
 ---
 
 ## Benchmarks
@@ -482,6 +484,10 @@ curl http://localhost:8000/v1/usage
 }
 ```
 
+### Savings Dashboard
+
+Open `http://localhost:8000/dashboard` in your browser for a live view of total requests, tokens saved, cost saved, and per-optimization breakdown. Auto-refreshes every 10 seconds.
+
 ### Run Benchmarks Yourself
 
 ```bash

diff --git a/test_token0_litellm.py b/test_token0_litellm.py
@@ -0,0 +1,141 @@
+"""Tests for the Token0 LiteLLM CustomLogger integration.
+
+These tests verify the Token0Hook contract without making real API calls.
+Token0 is installed separately: pip install token0
+"""
+
+import pytest
+from unittest.mock import patch
+
+
+def _make_image_message(url: str = "data:image/jpeg;base64,/9j/fake") -> dict:
+    return {
+        "role": "user",
+        "content": [
+            {"type": "text", "text": "What's in this image?"},
+            {"type": "image_url", "image_url": {"url": url}},
+        ],
+    }
+
+
+# ---------------------------------------------------------------------------
+# Import guard — skip entire module if token0 is not installed
+# ---------------------------------------------------------------------------
+
+token0 = pytest.importorskip("token0", reason="token0 not installed")
+
+
+# ---------------------------------------------------------------------------
+# Hook contract tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_token0_hook_passthrough_for_non_completion():
+    """Hook must return data unchanged when call_type != 'completion'."""
+    from token0.litellm_hook import Token0Hook
+
+    hook = Token0Hook()
+    data = {"messages": [_make_image_message()], "model": "gpt-4o"}
+    result = await hook.async_pre_call_hook(
+        user_api_key_dict={}, cache=None, data=data, call_type="embedding"
+    )
+    assert result is data
+
+
+@pytest.mark.asyncio
+async def test_token0_hook_passthrough_for_empty_messages():
+    """Hook must return data unchanged when messages is empty."""
+    from token0.litellm_hook import Token0Hook
+
+    hook = Token0Hook()
+    data = {"messages": [], "model": "gpt-4o"}
+    result = await hook.async_pre_call_hook(
+        user_api_key_dict={}, cache=None, data=data, call_type="completion"
+    )
+    assert result is data
+
+
+@pytest.mark.asyncio
+async def test_token0_hook_text_only_passthrough():
+    """Text-only messages must pass through with zero overhead."""
+    from token0.litellm_hook import Token0Hook
+
+    hook = Token0Hook()
+    original_messages = [{"role": "user", "content": "Hello, what is 2+2?"}]
+    data = {"messages": original_messages, "model": "gpt-4o"}
+
+    result = await hook.async_pre_call_hook(
+        user_api_key_dict={}, cache=None, data=data, call_type="completion"
+    )
+
+    assert result["messages"] == original_messages
+
+
+@pytest.mark.asyncio
+async def test_token0_hook_attaches_stats_metadata():
+    """Hook must attach token0 stats to data['metadata']['token0']."""
+    from token0.litellm_hook import Token0Hook
+
+    hook = Token0Hook()
+    messages = [_make_image_message()]
+    data = {"messages": messages, "model": "gpt-4o"}
+
+    mock_stats = {
+        "tokens_before": 765,
+        "tokens_after": 85,
+        "tokens_saved": 680,
+        "optimizations": ["prompt-aware→low detail"],
+        "recommended_model": None,
+    }
+
+    with patch(
+        "token0.litellm_hook.optimize_messages",
+        return_value=(messages, mock_stats),
+    ):
+        result = await hook.async_pre_call_hook(
+            user_api_key_dict={}, cache=None, data=data, call_type="completion"
+        )
+
+    assert "metadata" in result
+    assert "token0" in result["metadata"]
+    assert result["metadata"]["token0"]["tokens_saved"] == 680
+
+
+@pytest.mark.asyncio
+async def test_token0_hook_remote_url_passthrough():
+    """Images with remote http/https URLs must not be modified."""
+    from token0.litellm_hook import Token0Hook
+
+    hook = Token0Hook()
+    remote_url = "https://example.com/photo.jpg"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Describe this"},
+                {"type": "image_url", "image_url": {"url": remote_url}},
+            ],
+        }
+    ]
+    data = {"messages": messages, "model": "gpt-4o"}
+
+    mock_stats = {
+        "tokens_before": 0,
+        "tokens_after": 0,
+        "tokens_saved": 0,
+        "optimizations": [],
+        "recommended_model": None,
+    }
+
+    with patch(
+        "token0.litellm_hook.optimize_messages",
+        return_value=(messages, mock_stats),
+    ):
+        result = await hook.async_pre_call_hook(
+            user_api_key_dict={}, cache=None, data=data, call_type="completion"
+        )
+
+    content = result["messages"][0]["content"]
+    image_parts = [p for p in content if p.get("type") == "image_url"]
+    assert image_parts[0]["image_url"]["url"] == remote_url
diff --git a/tests/test_saliency.py b/tests/test_saliency.py
@@ -0,0 +1,125 @@
+"""Tests for saliency-based ROI cropping."""
+
+from PIL import Image
+
+from token0.optimization.saliency import SaliencyResult, apply_saliency_crop, detect_roi
+
+
+def _make_image(w: int = 800, h: int = 1000) -> Image.Image:
+    return Image.new("RGB", (w, h), color=(200, 200, 200))
+
+
+# ---------------------------------------------------------------------------
+# detect_roi — keyword matching
+# ---------------------------------------------------------------------------
+
+
+def test_footer_keyword_crops_bottom():
+    img = _make_image()
+    result = detect_roi("What is the total amount on this invoice?", img)
+    assert result.cropped is True
+    assert result.matched_keyword is not None
+    # Bottom crop — top edge should be > 50% down
+    _, top, _, bottom = result.crop_box
+    assert top > img.height * 0.5
+    assert bottom == img.height
+
+
+def test_header_keyword_crops_top():
+    img = _make_image()
+    result = detect_roi("Read the header text", img)
+    assert result.cropped is True
+    left, top, right, bottom = result.crop_box
+    assert top == 0
+    assert bottom < img.height * 0.5
+
+
+def test_top_right_keyword():
+    img = _make_image()
+    result = detect_roi("What is the date on this document?", img)
+    assert result.cropped is True
+    left, top, right, bottom = result.crop_box
+    assert left > 0  # right half
+    assert top == 0
+
+
+def test_bottom_right_keyword():
+    img = _make_image()
+    result = detect_roi("What does the signature say at the bottom right?", img)
+    assert result.cropped is True
+    # "signature" matches footer rule (full-width bottom strip) — still a valid crop
+    _, top, _, bottom = result.crop_box
+    assert top > img.height * 0.5
+    assert bottom == img.height
+
+
+def test_no_match_returns_not_cropped():
+    img = _make_image()
+    result = detect_roi("Describe this image", img)
+    assert result.cropped is False
+    assert result.crop_box is None
+    assert result.savings_pct == 0.0
+
+
+def test_empty_prompt_returns_not_cropped():
+    img = _make_image()
+    result = detect_roi("", img)
+    assert result.cropped is False
+
+
+def test_tiny_image_skipped():
+    img = _make_image(100, 100)
+    result = detect_roi("What is the total?", img)
+    assert result.cropped is False
+
+
+def test_savings_pct_is_meaningful():
+    img = _make_image()
+    result = detect_roi("Read the header", img)
+    assert result.cropped is True
+    assert result.savings_pct >= 0.20
+
+
+# ---------------------------------------------------------------------------
+# apply_saliency_crop
+# ---------------------------------------------------------------------------
+
+
+def test_crop_produces_correct_dimensions():
+    img = _make_image(800, 1000)
+    result = detect_roi("What is the total?", img)
+    assert result.cropped
+    cropped = apply_saliency_crop(img, result)
+    left, top, right, bottom = result.crop_box
+    assert cropped.size == (right - left, bottom - top)
+
+
+def test_no_crop_returns_original():
+    img = _make_image()
+    result = SaliencyResult(cropped=False, crop_box=None, matched_keyword=None, savings_pct=0.0)
+    out = apply_saliency_crop(img, result)
+    assert out is img
+
+
+# ---------------------------------------------------------------------------
+# Integration: detect_roi → apply_saliency_crop produces smaller image
+# ---------------------------------------------------------------------------
+
+
+def test_cropped_image_is_smaller():
+    img = _make_image(800, 1000)
+    result = detect_roi("What is the invoice total?", img)
+    assert result.cropped
+    cropped = apply_saliency_crop(img, result)
+    orig_area = img.width * img.height
+    crop_area = cropped.width * cropped.height
+    assert crop_area < orig_area
+
+
+def test_center_keyword():
+    img = _make_image()
+    result = detect_roi("What is in the center of this image?", img)
+    assert result.cropped is True
+    left, top, right, bottom = result.crop_box
+    assert left > 0 and top > 0
+    assert right < img.width and bottom < img.height
diff --git a/token0/main.py b/token0/main.py
@@ -1,7 +1,10 @@
 import logging
+import pathlib
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
 
 from token0.api.v1.chat import router as chat_router
 from token0.api.v1.estimate import router as estimate_router
@@ -45,6 +48,15 @@ async def lifespan(app: FastAPI):
 app.include_router(usage_router, prefix="/v1")
 
 
+_static = pathlib.Path(__file__).parent / "static"
+app.mount("/static", StaticFiles(directory=_static), name="static")
+
+
+@app.get("/dashboard", response_class=HTMLResponse)
+async def dashboard():
+    return HTMLResponse((_static / "dashboard.html").read_text())
+
+
 @app.get("/health")
 async def health():
     return {

diff --git a/token0/optimization/message_optimizer.py b/token0/optimization/message_optimizer.py
@@ -6,7 +6,9 @@
 import logging
 
 from token0.optimization.analyzer import analyze_image
+from token0.optimization.prompt_classifier import extract_prompt_text
 from token0.optimization.router import plan_optimization
+from token0.optimization.saliency import apply_saliency_crop, detect_roi
 from token0.optimization.transformer import transform_image
 
 logger = logging.getLogger("token0.optimizer")
@@ -27,6 +29,7 @@ def optimize_messages(
     total_after = 0
     optimizations = []
     recommended_model = None
+    prompt_text = extract_prompt_text(messages)
 
     for msg in messages:
         content = msg.get("content")
@@ -84,6 +87,22 @@ def optimize_messages(
 
             try:
                 analysis, raw_bytes, pil_image = analyze_image(url)
+
+                # Saliency crop — trim to region the prompt asks about
+                saliency = detect_roi(prompt_text, pil_image)
+                if saliency.cropped:
+                    pil_image = apply_saliency_crop(pil_image, saliency)
+                    # Re-encode cropped image to bytes for downstream steps
+                    import io as _io
+
+                    fmt = "JPEG" if analysis.format == "jpg" else analysis.format.upper()
+                    buf = _io.BytesIO()
+                    pil_image.save(buf, format=fmt)
+                    raw_bytes = buf.getvalue()
+                    kw, pct = saliency.matched_keyword, saliency.savings_pct
+                    optimizations.append(f"saliency crop ({kw!r}: {pct:.0%} pixels removed)")
+                    logger.debug("token0: saliency crop on %r, savings=%.0f%%", kw, pct * 100)
+
                 plan = plan_optimization(
                     analysis,
                     model,