From 80e5ffdcbaabfe081cdf609a0badfaab297dd590 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 3 Jun 2026 03:27:59 -0400 Subject: [PATCH 1/2] feat: add litellm response_format example --- examples/integrations/README.md | 3 +- examples/integrations/litellm/README.md | 48 +++- .../integrations/litellm/response_format.py | 214 ++++++++++++++++++ tests/test_example_integrations_imports.py | 1 + tests/test_litellm_response_format_example.py | 112 +++++++++ 5 files changed, 376 insertions(+), 2 deletions(-) create mode 100644 examples/integrations/litellm/response_format.py create mode 100644 tests/test_litellm_response_format_example.py diff --git a/examples/integrations/README.md b/examples/integrations/README.md index 691ce44..b1834e8 100644 --- a/examples/integrations/README.md +++ b/examples/integrations/README.md @@ -4,7 +4,7 @@ These examples show how to use Context Compiler inside external app runtimes. ## LiteLLM (SDK) -Minimal example showing how to run Context Compiler before sending a request to the LLM with LiteLLM. +Minimal examples showing how to run Context Compiler before sending a request to the LLM with LiteLLM. Files: - Examples (basic + preprocessor): [litellm/README.md](litellm/README.md) @@ -30,6 +30,7 @@ See the LiteLLM examples README for setup and usage: - If result is `clarify`, show the question and do not call the LLM. - If result is `passthrough`, send normal user input. - If result is `update`, use updated state and call the model with saved state in the prompt. +- `response_format.py` shows a different boundary: saved compiler state changes the LiteLLM request shape instead of being reinjected into prompt text. ## LiteLLM Proxy diff --git a/examples/integrations/litellm/README.md b/examples/integrations/litellm/README.md index 22f0523..cd1bbeb 100644 --- a/examples/integrations/litellm/README.md +++ b/examples/integrations/litellm/README.md @@ -1,8 +1,9 @@ # LiteLLM examples -This directory contains two small Context Compiler + LiteLLM integration examples: +This directory contains three small Context Compiler + LiteLLM integration examples: - `basic.py`: compiler-only flow (no preprocessor) +- `response_format.py`: host-side LiteLLM `response_format` selection from saved compiler state - `with_preprocessor.py`: heuristic-first preprocessor with optional LLM fallback before `engine.step(...)` ## Requirements @@ -50,6 +51,21 @@ PY This near-miss input should return `clarify` instead of being rewritten. +For host-side response shape selection: + +```shell +pip install "context-compiler[integrations]" +export OPENAI_API_KEY=... +export MODEL=openai/gpt-4o-mini +python - <<'PY' +from context_compiler import create_engine +from examples.integrations.litellm.response_format import plan_turn +engine = create_engine() +engine.step("use compact_summary") +print(plan_turn("Summarize the release notes.", engine)) +PY +``` + ## Environment configuration Required (normal `openai` mode): @@ -122,6 +138,18 @@ Note: In these LiteLLM examples, `update` is rendered locally and does not call the downstream LLM. This makes state changes explicit. Production apps may choose different rendering behavior. +## Response format example boundary + +`response_format.py` shows a different integration boundary from prompt reinjection: + +- Context Compiler owns authoritative state. +- The host reads saved policy state and selects a LiteLLM `response_format` or omits it. +- LiteLLM owns model invocation and provider behavior. +- Context Compiler does not call LiteLLM on its own. +- Context Compiler does not validate model output. +- Context Compiler does not generate schemas dynamically. +- This is application-layer use of authoritative state, not compiler semantics. + ## Troubleshooting - `litellm is required`: install `context-compiler[integrations]` (or `context-compiler[experimental]` for preprocessor). @@ -145,6 +173,11 @@ Decision flow in both examples: - `clarify`: show `prompt_to_user`; do not treat state as changed. - `update`: state changed; use updated state for the next model call. +Decision flow in `response_format.py`: +- `passthrough`: let the host decide whether to send `response_format`. +- `clarify`: show `prompt_to_user`; do not call LiteLLM. +- `update`: state changed; the next host request may use a different `response_format`. + ## Example checks - Near-miss passthrough (`with_preprocessor.py`): @@ -158,3 +191,16 @@ Decision flow in both examples: - `use podman instead of docker` without prior `use docker` -> replacement clarify. - Directive-adjacent abstain (`with_preprocessor.py`): - `change premise concise replies` is classified as `unknown`, not rewritten, and handled by engine clarify. +- Host-side request shaping (`response_format.py`): + - `use compact_summary` -> host selects compact-summary `response_format`. + - `use action_plan` -> host selects action-plan `response_format`. + - `prohibit compact_summary` -> host omits that `response_format`. + +## Optional smoke run for `response_format.py` + +```shell +export RUN_LITELLM_SMOKE=1 +export OPENAI_API_KEY=... +export MODEL=openai/gpt-4o-mini +uv run python examples/integrations/litellm/response_format.py +``` diff --git a/examples/integrations/litellm/response_format.py b/examples/integrations/litellm/response_format.py new file mode 100644 index 0000000..f70ce59 --- /dev/null +++ b/examples/integrations/litellm/response_format.py @@ -0,0 +1,214 @@ +"""Minimal LiteLLM response_format selection from authoritative state. + +Flow: +Context Compiler state -> host response_format decision -> LiteLLM model call. + +This example keeps model execution optional so tests can validate behavior +without a live provider. +""" + +import os +from collections.abc import Callable, Mapping +from importlib import import_module +from typing import Any, TypedDict, cast + +from context_compiler import ( + POLICY_PROHIBIT, + POLICY_USE, + State, + create_engine, + get_clarify_prompt, + get_decision_state, + get_policy_items, + is_clarify, +) +from context_compiler.engine import Engine + +try: + from host_support import print_startup_config, resolve_provider_config +except ImportError: + from host_support.provider_mode import print_startup_config, resolve_provider_config + +COMPACT_SUMMARY_RESPONSE_FORMAT: dict[str, Any] = { + "type": "json_schema", + "json_schema": { + "name": "compact_summary", + "schema": { + "type": "object", + "properties": { + "summary": { + "type": "string", + "description": "A compact summary of the answer.", + } + }, + "required": ["summary"], + "additionalProperties": False, + }, + }, +} + +ACTION_PLAN_RESPONSE_FORMAT: dict[str, Any] = { + "type": "json_schema", + "json_schema": { + "name": "action_plan", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": {"type": "string"}, + "description": "Ordered next steps for the user.", + } + }, + "required": ["steps"], + "additionalProperties": False, + }, + }, +} + +_RESPONSE_FORMAT_BY_ITEM: dict[str, dict[str, Any]] = { + "compact_summary": COMPACT_SUMMARY_RESPONSE_FORMAT, + "action_plan": ACTION_PLAN_RESPONSE_FORMAT, +} + + +class TurnPlan(TypedDict): + decision_kind: str + clarify_prompt: str | None + selected_response_format_item: str | None + response_format: dict[str, Any] | None + + +class _LiteLLMCallKwargs(TypedDict, total=False): + model: str + messages: list[dict[str, str]] + temperature: float + api_base: str + api_key: str + response_format: dict[str, Any] + + +def select_litellm_response_format(state: State) -> tuple[str | None, dict[str, Any] | None]: + """Return (policy_item, response_format) or (None, None) when no safe match exists.""" + + use_items = set(get_policy_items(state, POLICY_USE)) + prohibit_items = set(get_policy_items(state, POLICY_PROHIBIT)) + + for item, response_format in _RESPONSE_FORMAT_BY_ITEM.items(): + if item in use_items and item not in prohibit_items: + return item, response_format + + return None, None + + +def plan_turn(user_input: str, engine: Engine) -> TurnPlan: + """Run compiler step and decide whether to request LiteLLM structured output.""" + + decision = engine.step(user_input) + if is_clarify(decision): + return { + "decision_kind": "clarify", + "clarify_prompt": get_clarify_prompt(decision), + "selected_response_format_item": None, + "response_format": None, + } + + decision_state = get_decision_state(decision) + compiled_state = decision_state if decision_state is not None else engine.state + selected_item, response_format = select_litellm_response_format(compiled_state) + + return { + "decision_kind": str(decision["kind"]), + "clarify_prompt": None, + "selected_response_format_item": selected_item, + "response_format": response_format, + } + + +def _get_litellm_completion() -> Callable[..., object]: + litellm_module = import_module("litellm") + return cast(Callable[..., object], litellm_module.completion) + + +def _extract_response_content(response: object) -> str | None: + if isinstance(response, Mapping): + choices = response.get("choices") + if isinstance(choices, list) and choices: + first = choices[0] + if isinstance(first, Mapping): + message = first.get("message") + if isinstance(message, Mapping): + content = message.get("content") + if isinstance(content, str): + return content + + choices_attr = getattr(response, "choices", None) + if isinstance(choices_attr, list) and choices_attr: + first = choices_attr[0] + message_attr = getattr(first, "message", None) + content_attr = getattr(message_attr, "content", None) + if isinstance(content_attr, str): + return content_attr + + return None + + +def optional_litellm_call( + *, + user_input: str, + response_format: Mapping[str, Any] | None, +) -> str: + """Optional smoke call to LiteLLM. + + If `response_format` is provided, it is passed through unchanged. + """ + + try: + completion = _get_litellm_completion() + except ModuleNotFoundError as exc: + raise RuntimeError("litellm is required. Install with: pip install litellm") from exc + + config = resolve_provider_config(default_model="openai/gpt-4o-mini") + print_startup_config(config) + + kwargs: _LiteLLMCallKwargs = { + "model": config.model, + "messages": [{"role": "user", "content": user_input}], + "temperature": 0, + "api_base": config.base_url, + } + if config.api_key: + kwargs["api_key"] = config.api_key + if response_format is not None: + kwargs["response_format"] = dict(response_format) + + response = completion(**kwargs) + content = _extract_response_content(response) + if content is None: + raise RuntimeError("LiteLLM response missing choices[0].message.content") + return content + + +def main() -> None: + engine = create_engine() + + # Demonstration setup. + engine.step("use compact_summary") + engine.step("prohibit action_plan") + + plan = plan_turn("Summarize what changed in this project.", engine) + print("decision_kind:", plan["decision_kind"]) + print("selected_response_format_item:", plan["selected_response_format_item"]) + print("response_format_selected:", plan["response_format"] is not None) + + # Optional model execution path; disabled by default. + if os.getenv("RUN_LITELLM_SMOKE") == "1": + response = optional_litellm_call( + user_input="Summarize what changed in this project.", + response_format=plan["response_format"], + ) + print("litellm_response:", response) + + +if __name__ == "__main__": + main() diff --git a/tests/test_example_integrations_imports.py b/tests/test_example_integrations_imports.py index 5612dde..fac21c3 100644 --- a/tests/test_example_integrations_imports.py +++ b/tests/test_example_integrations_imports.py @@ -83,6 +83,7 @@ def _guarded_import( ("module_path", "blocked_prefixes", "needs_openwebui_stubs"), [ (INTEGRATIONS_DIR / "litellm" / "basic.py", ("litellm",), False), + (INTEGRATIONS_DIR / "litellm" / "response_format.py", ("litellm",), False), (INTEGRATIONS_DIR / "litellm" / "with_preprocessor.py", ("litellm",), False), ( INTEGRATIONS_DIR / "ollama_structured_output" / "example.py", diff --git a/tests/test_litellm_response_format_example.py b/tests/test_litellm_response_format_example.py new file mode 100644 index 0000000..d217c7b --- /dev/null +++ b/tests/test_litellm_response_format_example.py @@ -0,0 +1,112 @@ +import importlib.util +from pathlib import Path +from typing import Any + +from context_compiler import create_engine + +REPO_ROOT = Path(__file__).resolve().parents[1] +EXAMPLE_PATH = REPO_ROOT / "examples" / "integrations" / "litellm" / "response_format.py" + + +def _load_module(): + spec = importlib.util.spec_from_file_location("litellm_response_format_example", EXAMPLE_PATH) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_no_supported_policy_selects_no_response_format() -> None: + module = _load_module() + engine = create_engine() + + plan = module.plan_turn("hello", engine) + assert plan["decision_kind"] == "passthrough" + assert plan["selected_response_format_item"] is None + assert plan["response_format"] is None + + +def test_use_compact_summary_selects_compact_summary_response_format() -> None: + module = _load_module() + engine = create_engine() + + assert engine.step("use compact_summary")["kind"] == "update" + + plan = module.plan_turn("summarize this", engine) + assert plan["selected_response_format_item"] == "compact_summary" + assert plan["response_format"] == module.COMPACT_SUMMARY_RESPONSE_FORMAT + + +def test_use_action_plan_selects_action_plan_response_format() -> None: + module = _load_module() + engine = create_engine() + + assert engine.step("use action_plan")["kind"] == "update" + + plan = module.plan_turn("what should i do next?", engine) + assert plan["selected_response_format_item"] == "action_plan" + assert plan["response_format"] == module.ACTION_PLAN_RESPONSE_FORMAT + + +def test_prohibit_compact_summary_omits_compact_summary_response_format() -> None: + module = _load_module() + engine = create_engine() + + assert engine.step("prohibit compact_summary")["kind"] == "update" + + plan = module.plan_turn("summarize this", engine) + assert plan["selected_response_format_item"] is None + assert plan["response_format"] is None + + +def test_contradiction_path_returns_clarify_and_skips_response_format() -> None: + module = _load_module() + engine = create_engine() + + assert engine.step("use compact_summary")["kind"] == "update" + + plan = module.plan_turn("prohibit compact_summary", engine) + assert plan["decision_kind"] == "clarify" + assert plan["clarify_prompt"] is not None + assert plan["selected_response_format_item"] is None + assert plan["response_format"] is None + + +def test_optional_litellm_call_includes_response_format_when_selected(monkeypatch) -> None: + module = _load_module() + seen: dict[str, object] = {} + + def _completion(**kwargs: Any) -> dict[str, object]: + seen.update(kwargs) + return {"choices": [{"message": {"content": "ok"}}]} + + monkeypatch.setenv("OPENAI_API_KEY", "dummy") + monkeypatch.setattr(module, "_get_litellm_completion", lambda: _completion) + + result = module.optional_litellm_call( + user_input="summarize this", + response_format=module.COMPACT_SUMMARY_RESPONSE_FORMAT, + ) + + assert result == "ok" + assert seen["response_format"] == module.COMPACT_SUMMARY_RESPONSE_FORMAT + + +def test_optional_litellm_call_omits_response_format_when_not_selected(monkeypatch) -> None: + module = _load_module() + seen: dict[str, object] = {} + + def _completion(**kwargs: Any) -> dict[str, object]: + seen.update(kwargs) + return {"choices": [{"message": {"content": "ok"}}]} + + monkeypatch.setenv("OPENAI_API_KEY", "dummy") + monkeypatch.setattr(module, "_get_litellm_completion", lambda: _completion) + + result = module.optional_litellm_call( + user_input="hello", + response_format=None, + ) + + assert result == "ok" + assert "response_format" not in seen From b4c0c82fd0dded256e805528fdc0456ceb797da6 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 3 Jun 2026 03:34:08 -0400 Subject: [PATCH 2/2] docs: clarify ollama smoke command --- examples/integrations/litellm/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/integrations/litellm/README.md b/examples/integrations/litellm/README.md index cd1bbeb..c2ad3dd 100644 --- a/examples/integrations/litellm/README.md +++ b/examples/integrations/litellm/README.md @@ -200,7 +200,11 @@ Decision flow in `response_format.py`: ```shell export RUN_LITELLM_SMOKE=1 -export OPENAI_API_KEY=... -export MODEL=openai/gpt-4o-mini +export PROVIDER=ollama +export MODEL=ollama/qwen2.5:1.5b-instruct uv run python examples/integrations/litellm/response_format.py ``` + +For local Ollama smoke runs in this repo, `PROVIDER=ollama` is required. A +`MODEL=ollama/...` value by itself still follows the default OpenAI provider +path.