diff --git a/README.md b/README.md index 20d314d..e709ada 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ Any reachable hostname/IP works — the connecting user just needs network acces ### 4. Pick an LLM provider (for the internal agent) -The agent talks to any LiteLLM-supported backend. **Recommended for new users: `deepinfra` with `google/gemma-4-31B-it`** — fast (5–30s per agent call), cheap, validated end-to-end on the wiki/maintainer/writer pipeline. `nim` is a free-tier fallback (occasionally flaky). The `vllm_*` profiles run a local model on your own GPU workstation — useful for offline / cost-free experiments, but require a running vLLM server reachable from the docker network (typically via SSH tunnel). +The agent talks to any LiteLLM-supported backend. **Recommended for new users: `deepinfra` with `google/gemma-4-31B-it`** — fast (5–30s per agent call), cheap, validated end-to-end on the wiki/maintainer/writer pipeline. `nim` is a free-tier fallback (occasionally flaky). The `vllm_*` profiles run a local model on your own GPU workstation — useful for offline / cost-free experiments, but require a running vLLM server reachable from the docker network (typically via SSH tunnel). Use `openai_compatible` for generic OpenAI-compatible `/v1` endpoints such as Ollama, copilot-api, or LM Studio. In `.env`: ``` @@ -81,9 +81,18 @@ DEEPINFRA_API_KEY=... # if profile=deepinfra — get from https://deepinf NVIDIA_NIM_API_KEY=... # if profile=nim — get from https://build.nvidia.com/ ``` +For an OpenAI-compatible local endpoint: + +``` +LLM_PROFILE=openai_compatible +AGENT_MODEL=openai/llama3.2:3b +AGENT_BASE_URL=http://host.docker.internal:11434/v1 +AGENT_API_KEY= # optional; set only if the endpoint requires auth +``` + Only the key matching your chosen profile needs to be filled. Leave the other blank or absent. -Adding a third provider (Together, OpenAI, local vLLM, whatever) is a two-line entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + an env var — no other code changes. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe. +Adding another hosted provider (Together, OpenAI, whatever) is a two-line entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + an env var — no other code changes. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe. ### 5. Create the Docker network, then bring the stack up @@ -180,18 +189,28 @@ The agent has 21 tools — every single BrainDB endpoint plus `delegate_to_subag - **`deepinfra` — recommended default.** Model `google/gemma-4-31B-it`. Fast (5–30s per agent call), cheap, validated end-to-end. - `nim` — NVIDIA NIM, model `google/gemma-4-31b-it`. Free tier, occasionally flaky. +- `openai_compatible` — any OpenAI-compatible `/v1` endpoint. Set `AGENT_MODEL=openai/` and `AGENT_BASE_URL`. - `vllm_workstation` / `vllm_workstation_qwen` / `vllm_workstation_gemma` — local vLLM running on your own GPU (advanced / offline; needs the server reachable from the docker network, usually via SSH tunnel). Each profile is a model-prefix + env-var pair; adding a new one is a dict entry. ``` -LLM_PROFILE=deepinfra # or nim / vllm_workstation / vllm_workstation_qwen +LLM_PROFILE=deepinfra # or nim / openai_compatible / vllm_workstation DEEPINFRA_API_KEY=... # required if profile=deepinfra (https://deepinfra.com/) NVIDIA_NIM_API_KEY=... # required if profile=nim (https://build.nvidia.com/) VLLM_API_KEY=... # optional, only if local vLLM is started with --api-key AGENT_MODEL= # optional: override the profile's default model ``` +For `openai_compatible`, `AGENT_MODEL` is required because BrainDB does not know which model your endpoint serves: + +``` +LLM_PROFILE=openai_compatible +AGENT_MODEL=openai/llama3.2:3b +AGENT_BASE_URL=http://host.docker.internal:11434/v1 +AGENT_API_KEY= +``` + **Verbose logging**: set `AGENT_VERBOSE=true` in `.env` to log every tool call (entry args + exit elapsed/result) to stdout, visible via `docker logs braindb_api -f`. --- diff --git a/braindb/config.py b/braindb/config.py index 25acfcd..3078214 100644 --- a/braindb/config.py +++ b/braindb/config.py @@ -20,6 +20,10 @@ "model": "deepinfra/google/gemma-4-31B-it", "api_key_env": "DEEPINFRA_API_KEY", }, + "openai_compatible": { + "model": "", + "api_key_env": "AGENT_API_KEY", + }, "vllm_workstation": { "model": "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit", "api_key_env": "VLLM_API_KEY", @@ -105,6 +109,7 @@ class Settings(BaseSettings): # Agent (LiteLLM — provider selected via llm_profile) llm_profile: str = "deepinfra" agent_model: str = "" # blank = use profile's default model + agent_base_url: str = "" # Bumped 15 → 20 after live observation on Qwen 27B AWQ-INT4 (vLLM): # deep-research-style runs commonly need >15 tool turns to land # `final_answer`. 20 gives breathing room; finishes-fast providers @@ -176,13 +181,13 @@ def resolved_api_key(self) -> str: key = os.getenv(profile["api_key_env"], "") # Self-hosted profiles (vLLM/Ollama) may run without auth, but the # OpenAI client still needs a non-empty key — supply a placeholder. - if not key and profile.get("base_url"): + if not key and self.resolved_base_url: return "EMPTY" return key @property def resolved_base_url(self) -> str | None: - return _LLM_PROFILES[self.llm_profile].get("base_url") + return self.agent_base_url or _LLM_PROFILES[self.llm_profile].get("base_url") settings = Settings() diff --git a/docker-compose.yml b/docker-compose.yml index da218f6..03d6ea1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,8 @@ services: HF_TOKEN: ${HF_TOKEN:-} LLM_PROFILE: ${LLM_PROFILE:-deepinfra} AGENT_MODEL: ${AGENT_MODEL:-} + AGENT_BASE_URL: ${AGENT_BASE_URL:-} + AGENT_API_KEY: ${AGENT_API_KEY:-} NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY:-} DEEPINFRA_API_KEY: ${DEEPINFRA_API_KEY:-} VLLM_API_KEY: ${VLLM_API_KEY:-} diff --git a/pyproject.toml b/pyproject.toml index cb01094..b027e9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,3 +61,6 @@ packages = ["braindb"] testpaths = ["tests"] timeout = 60 addopts = "-ra" +markers = [ + "unit: tests that do not require a live BrainDB stack", +] diff --git a/tests/conftest.py b/tests/conftest.py index 8f573d9..537d85f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,8 +42,9 @@ def _wait_for_health(url: str, timeout: int = 30) -> bool: @pytest.fixture(scope="session", autouse=True) -def _require_live_api() -> None: - """Fail fast and loud if the stack isn't up — tests have nothing to run against.""" +def _require_live_api(request: pytest.FixtureRequest) -> None: + if request.session.items and all(item.get_closest_marker("unit") for item in request.session.items): + return if not _wait_for_health(API_URL): pytest.fail( f"BrainDB API not healthy at {API_URL}. " @@ -52,7 +53,7 @@ def _require_live_api() -> None: @pytest.fixture(scope="session", autouse=True) -def _purge_pytest_artefacts_at_session_end() -> Iterator[None]: +def _purge_pytest_artefacts_at_session_end(request: pytest.FixtureRequest) -> Iterator[None]: """Session teardown safety net for the per-test `created_entities` fixture: any test that errors before registering its IDs (or that bypasses the factories entirely) still leaks `_pytest_` rows @@ -68,6 +69,8 @@ def _purge_pytest_artefacts_at_session_end() -> Iterator[None]: entities themselves. """ yield + if request.session.items and all(item.get_closest_marker("unit") for item in request.session.items): + return try: from braindb.db import get_conn # only imported at teardown except Exception as exc: # noqa: BLE001 — defensive, never block the session diff --git a/tests/test_config_profiles.py b/tests/test_config_profiles.py new file mode 100644 index 0000000..313fb51 --- /dev/null +++ b/tests/test_config_profiles.py @@ -0,0 +1,54 @@ +import pytest + +from braindb.config import Settings + + +pytestmark = pytest.mark.unit + + +def test_openai_compatible_profile_resolves_env_values(monkeypatch): + monkeypatch.setenv("AGENT_MODEL", "openai/gpt-5-mini") + monkeypatch.setenv("AGENT_BASE_URL", "http://localhost:4141/v1") + monkeypatch.setenv("AGENT_API_KEY", "test-key") + + settings = Settings(_env_file=None, llm_profile="openai_compatible") + + assert settings.resolved_agent_model == "openai/gpt-5-mini" + assert settings.resolved_base_url == "http://localhost:4141/v1" + assert settings.resolved_api_key == "test-key" + + +def test_openai_compatible_profile_allows_empty_key_for_local_endpoint(monkeypatch): + monkeypatch.setenv("AGENT_MODEL", "openai/llama3.2:3b") + monkeypatch.setenv("AGENT_BASE_URL", "http://localhost:11434/v1") + monkeypatch.delenv("AGENT_API_KEY", raising=False) + + settings = Settings(_env_file=None, llm_profile="openai_compatible") + + assert settings.resolved_agent_model == "openai/llama3.2:3b" + assert settings.resolved_base_url == "http://localhost:11434/v1" + assert settings.resolved_api_key == "EMPTY" + + +@pytest.mark.parametrize( + ("profile", "expected_model", "expected_base_url"), + [ + ("deepinfra", "deepinfra/google/gemma-4-31B-it", None), + ("nim", "nvidia_nim/google/gemma-4-31b-it", None), + ("vllm_workstation", "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit", "http://host.docker.internal:8002/v1"), + ("vllm_workstation_qwen", "openai/cyankiwi/Qwen3.6-27B-AWQ-INT4", "http://host.docker.internal:8010/v1"), + ("vllm_workstation_gemma", "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit", "http://host.docker.internal:8009/v1"), + ], +) +def test_existing_profiles_keep_current_resolution(monkeypatch, profile, expected_model, expected_base_url): + monkeypatch.delenv("AGENT_MODEL", raising=False) + monkeypatch.delenv("AGENT_BASE_URL", raising=False) + monkeypatch.delenv("DEEPINFRA_API_KEY", raising=False) + monkeypatch.delenv("NVIDIA_NIM_API_KEY", raising=False) + monkeypatch.delenv("VLLM_API_KEY", raising=False) + + settings = Settings(_env_file=None, llm_profile=profile) + + assert settings.resolved_agent_model == expected_model + assert settings.resolved_base_url == expected_base_url + assert settings.resolved_api_key == ("EMPTY" if expected_base_url else "")