rlippmann · rlippmann · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/examples/integrations/README.md b/examples/integrations/README.md
@@ -4,7 +4,7 @@ These examples show how to use Context Compiler inside external app runtimes.
 
 ## LiteLLM (SDK)
 
-Minimal example showing how to run Context Compiler before sending a request to the LLM with LiteLLM.
+Minimal examples showing how to run Context Compiler before sending a request to the LLM with LiteLLM.
 
 Files:
 - Examples (basic + preprocessor): [litellm/README.md](litellm/README.md)
@@ -30,6 +30,7 @@ See the LiteLLM examples README for setup and usage:
 - If result is `clarify`, show the question and do not call the LLM.
 - If result is `passthrough`, send normal user input.
 - If result is `update`, use updated state and call the model with saved state in the prompt.
+- `response_format.py` shows a different boundary: saved compiler state changes the LiteLLM request shape instead of being reinjected into prompt text.
 
 ## LiteLLM Proxy
 

diff --git a/examples/integrations/litellm/README.md b/examples/integrations/litellm/README.md
@@ -1,8 +1,9 @@
 # LiteLLM examples
 
-This directory contains two small Context Compiler + LiteLLM integration examples:
+This directory contains three small Context Compiler + LiteLLM integration examples:
 
 - `basic.py`: compiler-only flow (no preprocessor)
+- `response_format.py`: host-side LiteLLM `response_format` selection from saved compiler state
 - `with_preprocessor.py`: heuristic-first preprocessor with optional LLM fallback before `engine.step(...)`
 
 ## Requirements
@@ -50,6 +51,21 @@ PY
 
 This near-miss input should return `clarify` instead of being rewritten.
 
+For host-side response shape selection:
+
+```shell
+pip install "context-compiler[integrations]"
+export OPENAI_API_KEY=...
+export MODEL=openai/gpt-4o-mini
+python - <<'PY'
+from context_compiler import create_engine
+from examples.integrations.litellm.response_format import plan_turn
+engine = create_engine()
+engine.step("use compact_summary")
+print(plan_turn("Summarize the release notes.", engine))
+PY
+```
+
 ## Environment configuration
 
 Required (normal `openai` mode):
@@ -122,6 +138,18 @@ Note: In these LiteLLM examples, `update` is rendered locally and does not call
 the downstream LLM. This makes state changes explicit. Production apps may
 choose different rendering behavior.
 
+## Response format example boundary
+
+`response_format.py` shows a different integration boundary from prompt reinjection:
+
+- Context Compiler owns authoritative state.
+- The host reads saved policy state and selects a LiteLLM `response_format` or omits it.
+- LiteLLM owns model invocation and provider behavior.
+- Context Compiler does not call LiteLLM on its own.
+- Context Compiler does not validate model output.
+- Context Compiler does not generate schemas dynamically.
+- This is application-layer use of authoritative state, not compiler semantics.
+
 ## Troubleshooting
 
 - `litellm is required`: install `context-compiler[integrations]` (or `context-compiler[experimental]` for preprocessor).
@@ -145,6 +173,11 @@ Decision flow in both examples:
 - `clarify`: show `prompt_to_user`; do not treat state as changed.
 - `update`: state changed; use updated state for the next model call.
 
+Decision flow in `response_format.py`:
+- `passthrough`: let the host decide whether to send `response_format`.
+- `clarify`: show `prompt_to_user`; do not call LiteLLM.
+- `update`: state changed; the next host request may use a different `response_format`.
+
 ## Example checks
 
 - Near-miss passthrough (`with_preprocessor.py`):
@@ -158,3 +191,20 @@ Decision flow in both examples:
   - `use podman instead of docker` without prior `use docker` -> replacement clarify.
 - Directive-adjacent abstain (`with_preprocessor.py`):
   - `change premise concise replies` is classified as `unknown`, not rewritten, and handled by engine clarify.
+- Host-side request shaping (`response_format.py`):
+  - `use compact_summary` -> host selects compact-summary `response_format`.
+  - `use action_plan` -> host selects action-plan `response_format`.
+  - `prohibit compact_summary` -> host omits that `response_format`.
+
+## Optional smoke run for `response_format.py`
+
+```shell
+export RUN_LITELLM_SMOKE=1
+export PROVIDER=ollama
+export MODEL=ollama/qwen2.5:1.5b-instruct
+uv run python examples/integrations/litellm/response_format.py
+```
+
+For local Ollama smoke runs in this repo, `PROVIDER=ollama` is required. A
+`MODEL=ollama/...` value by itself still follows the default OpenAI provider
+path.
diff --git a/examples/integrations/litellm/response_format.py b/examples/integrations/litellm/response_format.py
@@ -0,0 +1,214 @@
+"""Minimal LiteLLM response_format selection from authoritative state.
+
+Flow:
+Context Compiler state -> host response_format decision -> LiteLLM model call.
+
+This example keeps model execution optional so tests can validate behavior
+without a live provider.
+"""
+
+import os
+from collections.abc import Callable, Mapping
+from importlib import import_module
+from typing import Any, TypedDict, cast
+
+from context_compiler import (
+    POLICY_PROHIBIT,
+    POLICY_USE,
+    State,
+    create_engine,
+    get_clarify_prompt,
+    get_decision_state,
+    get_policy_items,
+    is_clarify,
+)
+from context_compiler.engine import Engine
+
+try:
+    from host_support import print_startup_config, resolve_provider_config
+except ImportError:
+    from host_support.provider_mode import print_startup_config, resolve_provider_config
+
+COMPACT_SUMMARY_RESPONSE_FORMAT: dict[str, Any] = {
+    "type": "json_schema",
+    "json_schema": {
+        "name": "compact_summary",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "summary": {
+                    "type": "string",
+                    "description": "A compact summary of the answer.",
+                }
+            },
+            "required": ["summary"],
+            "additionalProperties": False,
+        },
+    },
+}
+
+ACTION_PLAN_RESPONSE_FORMAT: dict[str, Any] = {
+    "type": "json_schema",
+    "json_schema": {
+        "name": "action_plan",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "steps": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Ordered next steps for the user.",
+                }
+            },
+            "required": ["steps"],
+            "additionalProperties": False,
+        },
+    },
+}
+
+_RESPONSE_FORMAT_BY_ITEM: dict[str, dict[str, Any]] = {
+    "compact_summary": COMPACT_SUMMARY_RESPONSE_FORMAT,
+    "action_plan": ACTION_PLAN_RESPONSE_FORMAT,
+}
+
+
+class TurnPlan(TypedDict):
+    decision_kind: str
+    clarify_prompt: str | None
+    selected_response_format_item: str | None
+    response_format: dict[str, Any] | None
+
+
+class _LiteLLMCallKwargs(TypedDict, total=False):
+    model: str
+    messages: list[dict[str, str]]
+    temperature: float
+    api_base: str
+    api_key: str
+    response_format: dict[str, Any]
+
+
+def select_litellm_response_format(state: State) -> tuple[str | None, dict[str, Any] | None]:
+    """Return (policy_item, response_format) or (None, None) when no safe match exists."""
+
+    use_items = set(get_policy_items(state, POLICY_USE))
+    prohibit_items = set(get_policy_items(state, POLICY_PROHIBIT))
+
+    for item, response_format in _RESPONSE_FORMAT_BY_ITEM.items():
+        if item in use_items and item not in prohibit_items:
+            return item, response_format
+
+    return None, None
+
+
+def plan_turn(user_input: str, engine: Engine) -> TurnPlan:
+    """Run compiler step and decide whether to request LiteLLM structured output."""
+
+    decision = engine.step(user_input)
+    if is_clarify(decision):
+        return {
+            "decision_kind": "clarify",
+            "clarify_prompt": get_clarify_prompt(decision),
+            "selected_response_format_item": None,
+            "response_format": None,
+        }
+
+    decision_state = get_decision_state(decision)
+    compiled_state = decision_state if decision_state is not None else engine.state
+    selected_item, response_format = select_litellm_response_format(compiled_state)
+
+    return {
+        "decision_kind": str(decision["kind"]),
+        "clarify_prompt": None,
+        "selected_response_format_item": selected_item,
+        "response_format": response_format,
+    }
+
+
+def _get_litellm_completion() -> Callable[..., object]:
+    litellm_module = import_module("litellm")
+    return cast(Callable[..., object], litellm_module.completion)
+
+
+def _extract_response_content(response: object) -> str | None:
+    if isinstance(response, Mapping):
+        choices = response.get("choices")
+        if isinstance(choices, list) and choices:
+            first = choices[0]
+            if isinstance(first, Mapping):
+                message = first.get("message")
+                if isinstance(message, Mapping):
+                    content = message.get("content")
+                    if isinstance(content, str):
+                        return content
+
+    choices_attr = getattr(response, "choices", None)
+    if isinstance(choices_attr, list) and choices_attr:
+        first = choices_attr[0]
+        message_attr = getattr(first, "message", None)
+        content_attr = getattr(message_attr, "content", None)
+        if isinstance(content_attr, str):
+            return content_attr
+
+    return None
+
+
+def optional_litellm_call(
+    *,
+    user_input: str,
+    response_format: Mapping[str, Any] | None,
+) -> str:
+    """Optional smoke call to LiteLLM.
+
+    If `response_format` is provided, it is passed through unchanged.
+    """
+
+    try:
+        completion = _get_litellm_completion()
+    except ModuleNotFoundError as exc:
+        raise RuntimeError("litellm is required. Install with: pip install litellm") from exc
+
+    config = resolve_provider_config(default_model="openai/gpt-4o-mini")
+    print_startup_config(config)
+
+    kwargs: _LiteLLMCallKwargs = {
+        "model": config.model,
+        "messages": [{"role": "user", "content": user_input}],
+        "temperature": 0,
+        "api_base": config.base_url,
+    }
+    if config.api_key:
+        kwargs["api_key"] = config.api_key
+    if response_format is not None:
+        kwargs["response_format"] = dict(response_format)
+
+    response = completion(**kwargs)
+    content = _extract_response_content(response)
+    if content is None:
+        raise RuntimeError("LiteLLM response missing choices[0].message.content")
+    return content
+
+
+def main() -> None:
+    engine = create_engine()
+
+    # Demonstration setup.
+    engine.step("use compact_summary")
+    engine.step("prohibit action_plan")
+
+    plan = plan_turn("Summarize what changed in this project.", engine)
+    print("decision_kind:", plan["decision_kind"])
+    print("selected_response_format_item:", plan["selected_response_format_item"])
+    print("response_format_selected:", plan["response_format"] is not None)
+
+    # Optional model execution path; disabled by default.
+    if os.getenv("RUN_LITELLM_SMOKE") == "1":
+        response = optional_litellm_call(
+            user_input="Summarize what changed in this project.",
+            response_format=plan["response_format"],
+        )
+        print("litellm_response:", response)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_example_integrations_imports.py b/tests/test_example_integrations_imports.py
@@ -83,6 +83,7 @@ def _guarded_import(
     ("module_path", "blocked_prefixes", "needs_openwebui_stubs"),
     [
         (INTEGRATIONS_DIR / "litellm" / "basic.py", ("litellm",), False),
+        (INTEGRATIONS_DIR / "litellm" / "response_format.py", ("litellm",), False),
         (INTEGRATIONS_DIR / "litellm" / "with_preprocessor.py", ("litellm",), False),
         (
             INTEGRATIONS_DIR / "ollama_structured_output" / "example.py",