From 969e2162202f0b826039776385283c88e46e8f38 Mon Sep 17 00:00:00 2001
From: Pritom Mazumdar <pritommazumdar@HiLabs-MacBook-Pro-222.local>
Date: Fri, 27 Mar 2026 17:45:47 +0530
Subject: [PATCH 1/3] added support for litellm

---
 README.md                  |  36 ++++++-
 tests/test_litellm_hook.py | 202 +++++++++++++++++++++++++++++++++++++
 token0/litellm_hook.py     | 177 ++++++++++++++++++++++++++++++++
 3 files changed, 414 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_litellm_hook.py
 create mode 100644 token0/litellm_hook.py

diff --git a/README.md b/README.md
index 92c76a5..ff20a16 100644
--- a/README.md
+++ b/README.md
@@ -154,7 +154,7 @@ Using OpenAI's published token formulas on real images:
 
 ### Additional Test Coverage
 
-Token0 includes **93 unit tests** and benchmarks across multiple suites:
+Token0 includes **103 unit tests** and benchmarks across multiple suites:
 
 | Suite | Tests | What It Validates |
 |---|---|---|
@@ -165,6 +165,7 @@ Token0 includes **93 unit tests** and benchmarks across multiple suites:
 | `tasks` | 4 | Task types: classification, description, extraction, Q&A |
 | `real` | 5 | Real-world photos, receipts, invoices, screenshots |
 | `streaming` | 7 | SSE streaming: format, content, stats, image optimization |
+| `litellm` | 10 | LiteLLM hook: passthrough, optimization, OCR, cascade, async |
 
 ---
 
@@ -257,6 +258,39 @@ for chunk in stream:
 # Final chunk includes token0 optimization stats
 ```
 
+### Use With LiteLLM
+
+Already using [LiteLLM](https://github.com/BerriAI/litellm)? Add Token0 as a callback hook — no proxy needed:
+
+```python
+import litellm
+from token0.litellm_hook import Token0Hook
+
+litellm.callbacks = [Token0Hook()]
+
+# All your existing litellm calls now get image optimization for free
+response = litellm.completion(
+    model="gpt-4o",
+    messages=[{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": "What's in this image?"},
+            {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+        ]
+    }]
+)
+
+# Stats available in response metadata
+# response._hidden_params["metadata"]["token0"]["tokens_saved"]
+```
+
+Or in LiteLLM proxy `config.yaml`:
+
+```yaml
+litellm_settings:
+  callbacks: ["token0.litellm_hook.Token0Hook"]
+```
+
 ### Use With Ollama (free, fully local)
 
 ```bash
diff --git a/tests/test_litellm_hook.py b/tests/test_litellm_hook.py
new file mode 100644
index 0000000..9a85860
--- /dev/null
+++ b/tests/test_litellm_hook.py
@@ -0,0 +1,202 @@
+"""Tests for LiteLLM integration hook."""
+
+import pytest
+
+from tests.conftest import make_image, make_text_image
+from token0.litellm_hook import Token0Hook, _optimize_messages
+
+
+class TestOptimizeMessages:
+    def test_text_only_passthrough(self):
+        """Text-only messages pass through unchanged."""
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hello"},
+        ]
+        optimized, stats = _optimize_messages(messages, "gpt-4o")
+
+        assert optimized == messages
+        assert stats["tokens_saved"] == 0
+        assert stats["optimizations"] == []
+
+    def test_large_image_gets_optimized(self):
+        """Large images trigger resize optimization."""
+        _, data_uri = make_image(4000, 3000, "blue", "JPEG")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this"},
+                    {"type": "image_url", "image_url": {"url": data_uri}},
+                ],
+            }
+        ]
+        optimized, stats = _optimize_messages(messages, "gpt-4o")
+
+        assert len(stats["optimizations"]) > 0
+        # Image should still be present (resized, not OCR'd)
+        parts = optimized[0]["content"]
+        assert any(p.get("type") == "image_url" for p in parts)
+
+    def test_text_heavy_image_ocr_routed(self):
+        """Text-heavy images get OCR routed."""
+        _, data_uri = make_text_image(800, 600, lines=25, fmt="PNG")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Read this document"},
+                    {"type": "image_url", "image_url": {"url": data_uri}},
+                ],
+            }
+        ]
+        optimized, stats = _optimize_messages(messages, "gpt-4o")
+
+        assert stats["tokens_saved"] > 0
+        # OCR route replaces image with text
+        parts = optimized[0]["content"]
+        text_parts = [p for p in parts if p.get("type") == "text"]
+        assert len(text_parts) == 2  # original text + extracted text
+
+    def test_non_data_uri_passthrough(self):
+        """URLs (not base64) pass through unchanged."""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What is this?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/image.jpg"},
+                    },
+                ],
+            }
+        ]
+        optimized, stats = _optimize_messages(messages, "gpt-4o")
+
+        assert optimized == messages
+        assert stats["tokens_saved"] == 0
+
+    def test_multiple_images(self):
+        """Multiple images in one message are each optimized."""
+        _, uri1 = make_image(4000, 3000, "blue", "JPEG")
+        _, uri2 = make_image(3000, 2000, "red", "JPEG")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Compare these"},
+                    {"type": "image_url", "image_url": {"url": uri1}},
+                    {"type": "image_url", "image_url": {"url": uri2}},
+                ],
+            }
+        ]
+        optimized, stats = _optimize_messages(messages, "gpt-4o")
+
+        parts = optimized[0]["content"]
+        image_parts = [p for p in parts if p.get("type") == "image_url"]
+        assert len(image_parts) == 2
+
+    def test_cascade_recommends_cheaper_model(self):
+        """Model cascade suggests cheaper alternative."""
+        _, data_uri = make_image(800, 600, "red", "JPEG")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What is this?"},
+                    {"type": "image_url", "image_url": {"url": data_uri}},
+                ],
+            }
+        ]
+        # enable_cascade with a known model
+        _, stats = _optimize_messages(messages, "gpt-4o", enable_cascade=True)
+        # Cascade may or may not trigger depending on prompt classification
+        # Just verify the field exists
+        assert "recommended_model" in stats
+
+    def test_stats_structure(self):
+        """Stats dict has all expected keys."""
+        _, data_uri = make_image(800, 600, "red", "JPEG")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Hello"},
+                    {"type": "image_url", "image_url": {"url": data_uri}},
+                ],
+            }
+        ]
+        _, stats = _optimize_messages(messages, "gpt-4o")
+
+        assert "tokens_before" in stats
+        assert "tokens_after" in stats
+        assert "tokens_saved" in stats
+        assert "optimizations" in stats
+        assert "recommended_model" in stats
+        assert stats["tokens_saved"] == stats["tokens_before"] - stats["tokens_after"]
+
+
+class TestToken0HookIntegration:
+    @pytest.mark.asyncio
+    async def test_hook_modifies_data(self):
+        """Hook modifies data dict with optimized messages."""
+        _, data_uri = make_image(4000, 3000, "blue", "JPEG")
+        hook = Token0Hook()
+        data = {
+            "model": "gpt-4o",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Describe this"},
+                        {"type": "image_url", "image_url": {"url": data_uri}},
+                    ],
+                }
+            ],
+        }
+
+        result = await hook.async_pre_call_hook(
+            user_api_key_dict={},
+            cache=None,
+            data=data,
+            call_type="completion",
+        )
+
+        assert "metadata" in result
+        assert "token0" in result["metadata"]
+        assert result["metadata"]["token0"]["tokens_saved"] >= 0
+
+    @pytest.mark.asyncio
+    async def test_hook_skips_non_completion(self):
+        """Hook ignores non-completion call types."""
+        hook = Token0Hook()
+        data = {"model": "dall-e-3", "prompt": "A cat"}
+
+        result = await hook.async_pre_call_hook(
+            user_api_key_dict={},
+            cache=None,
+            data=data,
+            call_type="image_generation",
+        )
+
+        assert result == data
+        assert "metadata" not in result
+
+    @pytest.mark.asyncio
+    async def test_hook_text_only_no_metadata_overhead(self):
+        """Text-only requests get minimal metadata."""
+        hook = Token0Hook()
+        data = {
+            "model": "gpt-4o",
+            "messages": [{"role": "user", "content": "Hello"}],
+        }
+
+        result = await hook.async_pre_call_hook(
+            user_api_key_dict={},
+            cache=None,
+            data=data,
+            call_type="completion",
+        )
+
+        assert result["metadata"]["token0"]["tokens_saved"] == 0
diff --git a/token0/litellm_hook.py b/token0/litellm_hook.py
new file mode 100644
index 0000000..5560f13
--- /dev/null
+++ b/token0/litellm_hook.py
@@ -0,0 +1,177 @@
+"""LiteLLM integration — Token0 as a pre-call hook.
+
+Usage in litellm proxy_config.yaml:
+
+    litellm_settings:
+      callbacks: ["token0.litellm_hook.Token0Hook"]
+
+Or programmatically:
+
+    import litellm
+    from token0.litellm_hook import Token0Hook
+    litellm.callbacks = [Token0Hook()]
+"""
+
+import logging
+
+from litellm.integrations.custom_logger import CustomLogger
+
+from token0.optimization.analyzer import analyze_image
+from token0.optimization.router import plan_optimization
+from token0.optimization.transformer import transform_image
+
+logger = logging.getLogger("token0.litellm")
+
+
+class Token0Hook(CustomLogger):
+    """LiteLLM hook that optimizes vision tokens before LLM calls."""
+
+    def __init__(
+        self,
+        enable_cascade: bool = False,
+        detail_override: str | None = None,
+    ):
+        self.enable_cascade = enable_cascade
+        self.detail_override = detail_override
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict,
+        cache,
+        data: dict,
+        call_type: str,
+    ) -> dict:
+        """Optimize images in messages before the LLM call."""
+        if call_type != "completion":
+            return data
+
+        messages = data.get("messages")
+        if not messages:
+            return data
+
+        model = data.get("model", "")
+        optimized_messages, stats = _optimize_messages(
+            messages,
+            model,
+            detail_override=self.detail_override,
+            enable_cascade=self.enable_cascade,
+        )
+
+        data["messages"] = optimized_messages
+
+        if stats["tokens_saved"] > 0:
+            logger.info(
+                "token0: %d tokens saved (%s)",
+                stats["tokens_saved"],
+                ", ".join(stats["optimizations"]),
+            )
+
+        # Attach stats for downstream logging/callbacks
+        data.setdefault("metadata", {})
+        data["metadata"]["token0"] = stats
+
+        # Apply model cascade if recommended
+        if stats.get("recommended_model"):
+            data["model"] = stats["recommended_model"]
+            logger.info("token0: cascade %s → %s", model, stats["recommended_model"])
+
+        return data
+
+
+def _optimize_messages(
+    messages: list[dict],
+    model: str,
+    detail_override: str | None = None,
+    enable_cascade: bool = False,
+) -> tuple[list[dict], dict]:
+    """Optimize images in a list of message dicts.
+
+    Returns (optimized_messages, stats_dict).
+    """
+    optimized = []
+    total_before = 0
+    total_after = 0
+    optimizations = []
+    recommended_model = None
+
+    for msg in messages:
+        content = msg.get("content")
+
+        # Text-only message — pass through
+        if isinstance(content, str) or content is None:
+            optimized.append(msg)
+            continue
+
+        # Multi-part content — check for images
+        if not isinstance(content, list):
+            optimized.append(msg)
+            continue
+
+        opt_parts = []
+        for part in content:
+            if part.get("type") != "image_url":
+                opt_parts.append(part)
+                continue
+
+            image_url = part.get("image_url", {})
+            url = image_url.get("url", "")
+
+            # Only optimize base64 data URIs
+            if not url.startswith("data:"):
+                opt_parts.append(part)
+                continue
+
+            try:
+                analysis, raw_bytes, pil_image = analyze_image(url)
+                plan = plan_optimization(
+                    analysis,
+                    model,
+                    detail_override=detail_override,
+                    enable_cascade=enable_cascade,
+                )
+
+                total_before += plan.estimated_tokens_before
+                total_after += plan.estimated_tokens_after
+                optimizations.extend(plan.reasons)
+
+                if plan.recommended_model and recommended_model is None:
+                    recommended_model = plan.recommended_model
+
+                if plan.use_ocr_route:
+                    result = transform_image(plan, analysis, raw_bytes, pil_image)
+                    opt_parts.append(
+                        {
+                            "type": "text",
+                            "text": f"[Extracted text from image]:\n{result['content']}",
+                        }
+                    )
+                elif any([plan.resize, plan.recompress_jpeg, plan.force_detail_low]):
+                    result = transform_image(plan, analysis, raw_bytes, pil_image)
+                    detail = "low" if plan.force_detail_low else image_url.get("detail", "auto")
+                    opt_parts.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{result['media_type']};base64,{result['base64']}",
+                                "detail": detail,
+                            },
+                        }
+                    )
+                else:
+                    opt_parts.append(part)
+
+            except Exception:
+                logger.warning("token0: failed to optimize image, passing through", exc_info=True)
+                opt_parts.append(part)
+
+        optimized.append({"role": msg["role"], "content": opt_parts})
+
+    stats = {
+        "tokens_before": total_before,
+        "tokens_after": total_after,
+        "tokens_saved": total_before - total_after,
+        "optimizations": optimizations,
+        "recommended_model": recommended_model,
+    }
+
+    return optimized, stats

From 577186b2e3eb0604e7e0efbd1c96958d2ebfaafd Mon Sep 17 00:00:00 2001
From: Pritom Mazumdar <pritommazumdar@HiLabs-MacBook-Pro-222.local>
Date: Fri, 27 Mar 2026 17:48:21 +0530
Subject: [PATCH 2/3] bumped up version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 92bfdb4..ce17fdc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "token0"
-version = "0.2.0"
+version = "0.2.1"
 description = "Open-source API proxy that makes vision LLM calls 5-10x cheaper"
 readme = "README.md"
 license = "Apache-2.0"

From 03f9204eb8589638e4be0b45428bd08951e8c815 Mon Sep 17 00:00:00 2001
From: Pritom Mazumdar <pritommazumdar@HiLabs-MacBook-Pro-222.local>
Date: Fri, 27 Mar 2026 17:54:50 +0530
Subject: [PATCH 3/3] CI fixes

---
 tests/test_litellm_hook.py | 6 ++++--
 token0/litellm_hook.py     | 7 ++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/test_litellm_hook.py b/tests/test_litellm_hook.py
index 9a85860..c9076bc 100644
--- a/tests/test_litellm_hook.py
+++ b/tests/test_litellm_hook.py
@@ -2,8 +2,10 @@
 
 import pytest
 
-from tests.conftest import make_image, make_text_image
-from token0.litellm_hook import Token0Hook, _optimize_messages
+litellm = pytest.importorskip("litellm", reason="litellm not installed")
+
+from tests.conftest import make_image, make_text_image  # noqa: E402
+from token0.litellm_hook import Token0Hook, _optimize_messages  # noqa: E402
 
 
 class TestOptimizeMessages:
diff --git a/token0/litellm_hook.py b/token0/litellm_hook.py
index 5560f13..193c6a0 100644
--- a/token0/litellm_hook.py
+++ b/token0/litellm_hook.py
@@ -14,7 +14,12 @@
 
 import logging
 
-from litellm.integrations.custom_logger import CustomLogger
+try:
+    from litellm.integrations.custom_logger import CustomLogger
+except ImportError:
+    raise ImportError(
+        "litellm is required for the Token0Hook integration. Install it with: pip install litellm"
+    )
 
 from token0.optimization.analyzer import analyze_image
 from token0.optimization.router import plan_optimization