Pritom14 · Pritom14 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/README.md b/README.md
@@ -449,6 +449,35 @@ response = llm.invoke([HumanMessage(content=[
 
 Works with any LangChain chat model — ChatOpenAI, ChatAnthropic, ChatGoogleGenerativeAI, etc.
 
+### Use With Instructor
+
+Already using [Instructor](https://github.com/jxnl/instructor) for structured outputs? Add Token0 as a pre-call hook:
+
+```python
+import instructor
+import openai
+from token0.instructor_hook import Token0Hook
+
+client = instructor.from_openai(openai.OpenAI())
+client.on("completion:kwargs", Token0Hook())
+
+# All calls now get image optimization automatically
+response = client.chat.completions.create(
+    model="gpt-4.1",
+    messages=[{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": "What is the total on this invoice?"},
+            {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+        ]
+    }],
+    response_model=MyModel,
+)
+# Invoice image cropped to bottom 40% (saliency) + OCR routed — ~90% token savings
+```
+
+Works with any instructor-supported provider — OpenAI, Anthropic, Google, Ollama.
+
 ### Use With Ollama (free, fully local)
 
 ```bash

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "token0"
-version = "0.3.1"
+version = "0.3.2"
 description = "Open-source API proxy that makes vision LLM calls 5-10x cheaper"
 readme = "README.md"
 license = "Apache-2.0"

diff --git a/tests/test_instructor_hook.py b/tests/test_instructor_hook.py
@@ -0,0 +1,189 @@
+"""Tests for the instructor integration hook.
+
+All tests are mock-only — no real LLM calls, no instructor dependency required.
+"""
+
+from unittest.mock import patch
+
+from token0.instructor_hook import Token0Hook
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_MOCK_STATS_NO_SAVINGS = {
+    "tokens_before": 100,
+    "tokens_after": 100,
+    "tokens_saved": 0,
+    "optimizations": [],
+    "recommended_model": None,
+}
+
+_MOCK_STATS_WITH_SAVINGS = {
+    "tokens_before": 765,
+    "tokens_after": 85,
+    "tokens_saved": 680,
+    "optimizations": ["prompt-aware -> low detail (simple task)"],
+    "recommended_model": None,
+}
+
+_MOCK_STATS_CASCADE = {
+    "tokens_before": 765,
+    "tokens_after": 85,
+    "tokens_saved": 680,
+    "optimizations": ["cascade -> gpt-4o-mini"],
+    "recommended_model": "gpt-4o-mini",
+}
+
+_IMAGE_MESSAGE = [
+    {
+        "role": "user",
+        "content": [
+            {"type": "text", "text": "Describe this image"},
+            {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,abc123"}},
+        ],
+    }
+]
+
+_TEXT_MESSAGE = [{"role": "user", "content": "Hello"}]
+
+
+# ---------------------------------------------------------------------------
+# Basic behaviour
+# ---------------------------------------------------------------------------
+
+
+def test_hook_is_callable():
+    hook = Token0Hook()
+    assert callable(hook)
+
+
+def test_empty_messages_passthrough():
+    hook = Token0Hook()
+    kwargs = {"model": "gpt-4o", "messages": []}
+    result = hook(kwargs)
+    assert result == {"model": "gpt-4o", "messages": []}
+
+
+def test_missing_messages_passthrough():
+    hook = Token0Hook()
+    kwargs = {"model": "gpt-4o"}
+    result = hook(kwargs)
+    assert result == {"model": "gpt-4o"}
+
+
+def test_text_only_passthrough():
+    hook = Token0Hook()
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_TEXT_MESSAGE, _MOCK_STATS_NO_SAVINGS),
+    ):
+        result = hook({"model": "gpt-4o", "messages": _TEXT_MESSAGE})
+    assert result["messages"] == _TEXT_MESSAGE
+
+
+# ---------------------------------------------------------------------------
+# Image optimization
+# ---------------------------------------------------------------------------
+
+
+def test_image_messages_are_optimized():
+    hook = Token0Hook()
+    optimized = [{"role": "user", "content": [{"type": "text", "text": "[Extracted text]"}]}]
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(optimized, _MOCK_STATS_WITH_SAVINGS),
+    ) as mock_opt:
+        result = hook({"model": "gpt-4o", "messages": _IMAGE_MESSAGE})
+
+    mock_opt.assert_called_once_with(
+        _IMAGE_MESSAGE, "gpt-4o", detail_override=None, enable_cascade=False
+    )
+    assert result["messages"] == optimized
+
+
+def test_detail_override_passed_through():
+    hook = Token0Hook(detail_override="low")
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_IMAGE_MESSAGE, _MOCK_STATS_NO_SAVINGS),
+    ) as mock_opt:
+        hook({"model": "gpt-4o", "messages": _IMAGE_MESSAGE})
+
+    _, call_kwargs = mock_opt.call_args
+    assert call_kwargs.get("detail_override") == "low" or mock_opt.call_args[0][2] == "low"
+
+
+def test_enable_cascade_passed_through():
+    hook = Token0Hook(enable_cascade=True)
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_IMAGE_MESSAGE, _MOCK_STATS_NO_SAVINGS),
+    ) as mock_opt:
+        hook({"model": "gpt-4o", "messages": _IMAGE_MESSAGE})
+
+    args, kwargs = mock_opt.call_args
+    enable_cascade = kwargs.get("enable_cascade", args[3] if len(args) > 3 else False)
+    assert enable_cascade is True
+
+
+# ---------------------------------------------------------------------------
+# Model cascade
+# ---------------------------------------------------------------------------
+
+
+def test_cascade_updates_model():
+    hook = Token0Hook(enable_cascade=True)
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_IMAGE_MESSAGE, _MOCK_STATS_CASCADE),
+    ):
+        result = hook({"model": "gpt-4o", "messages": _IMAGE_MESSAGE})
+
+    assert result["model"] == "gpt-4o-mini"
+
+
+def test_no_cascade_leaves_model_unchanged():
+    hook = Token0Hook()
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_IMAGE_MESSAGE, _MOCK_STATS_WITH_SAVINGS),
+    ):
+        result = hook({"model": "gpt-4o", "messages": _IMAGE_MESSAGE})
+
+    assert result["model"] == "gpt-4o"
+
+
+# ---------------------------------------------------------------------------
+# kwargs passthrough
+# ---------------------------------------------------------------------------
+
+
+def test_extra_kwargs_preserved():
+    hook = Token0Hook()
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_IMAGE_MESSAGE, _MOCK_STATS_NO_SAVINGS),
+    ):
+        result = hook(
+            {
+                "model": "gpt-4o",
+                "messages": _IMAGE_MESSAGE,
+                "temperature": 0.7,
+                "max_tokens": 512,
+            }
+        )
+
+    assert result["temperature"] == 0.7
+    assert result["max_tokens"] == 512
+
+
+def test_no_model_key_still_works():
+    hook = Token0Hook()
+    with patch(
+        "token0.instructor_hook.optimize_messages",
+        return_value=(_IMAGE_MESSAGE, _MOCK_STATS_NO_SAVINGS),
+    ):
+        result = hook({"messages": _IMAGE_MESSAGE})
+
+    assert "messages" in result
diff --git a/token0/instructor_hook.py b/token0/instructor_hook.py
@@ -0,0 +1,86 @@
+"""Instructor integration — Token0 as a pre-call hook.
+
+Hooks into instructor's COMPLETION_KWARGS event to optimize vision tokens
+before every LLM call. Works with any instructor-supported provider.
+
+Usage:
+    import instructor
+    import openai
+    from token0.instructor_hook import Token0Hook
+
+    client = instructor.from_openai(openai.OpenAI())
+    hook = Token0Hook()
+    client.on("completion:kwargs", hook)
+
+    # All calls now get image optimization automatically
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What is the total on this invoice?"},
+                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+            ]
+        }],
+        response_model=MyModel,
+    )
+
+Works with any instructor provider — OpenAI, Anthropic, Google, Ollama, etc.
+No proxy required — runs as an in-process pre-call hook.
+"""
+
+import logging
+from typing import Any
+
+from token0.optimization.message_optimizer import optimize_messages
+
+logger = logging.getLogger("token0.instructor")
+
+
+class Token0Hook:
+    """Instructor pre-call hook that optimizes vision tokens before LLM calls.
+
+    Attach to any instructor client via client.on("completion:kwargs", Token0Hook()).
+
+    Args:
+        enable_cascade: Auto-route simple tasks to cheaper models (default: False).
+        detail_override: Force "low" or "high" detail mode for OpenAI (default: auto).
+    """
+
+    def __init__(
+        self,
+        enable_cascade: bool = False,
+        detail_override: str | None = None,
+    ):
+        self.enable_cascade = enable_cascade
+        self.detail_override = detail_override
+
+    def __call__(self, kwargs: dict[str, Any]) -> dict[str, Any]:
+        """Optimize images in kwargs["messages"] before the LLM call."""
+        messages = kwargs.get("messages")
+        if not messages:
+            return kwargs
+
+        model = kwargs.get("model", "")
+        optimized_messages, stats = optimize_messages(
+            messages,
+            model,
+            detail_override=self.detail_override,
+            enable_cascade=self.enable_cascade,
+        )
+
+        kwargs["messages"] = optimized_messages
+
+        if stats["tokens_saved"] > 0:
+            logger.info(
+                "token0: %d tokens saved (%s)",
+                stats["tokens_saved"],
+                ", ".join(stats["optimizations"]),
+            )
+
+        # Cascade: switch to cheaper model if recommended
+        if stats.get("recommended_model"):
+            logger.info("token0: cascade %s -> %s", model, stats["recommended_model"])
+            kwargs["model"] = stats["recommended_model"]
+
+        return kwargs