From 44179a40d6e081094abe428448f3a7de4079764a Mon Sep 17 00:00:00 2001 From: Giorgi Duchidze <124621555+gduchidze@users.noreply.github.com> Date: Sun, 24 May 2026 06:23:18 +0000 Subject: [PATCH] Add support for GCP models (Vertex AI and Gemini) in README and codebase --- README.md | 28 ++++++++++++++++++++++++++ agent/core/agent_loop.py | 6 +++++- agent/core/llm_params.py | 26 ++++++++++++++++++++++++ agent/core/model_switcher.py | 11 ++++++++++- agent/main.py | 6 ++++++ tests/unit/test_llm_params.py | 37 +++++++++++++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 849aa326..cfd7532a 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,10 @@ Create a `.env` file in the project root (or export these in your shell): ```bash ANTHROPIC_API_KEY= # if using anthropic models OPENAI_API_KEY= # if using openai models +GEMINI_API_KEY= # if using gemini/ (Google AI Studio) models +VERTEXAI_PROJECT= # if using vertex_ai/ models +VERTEXAI_LOCATION=us-central1 # GCP region for vertex_ai/ models +GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json # ADC for vertex_ai/ LOCAL_LLM_BASE_URL=http://localhost:8000 # shared fallback for local model prefixes LOCAL_LLM_API_KEY= # optional shared local API key HF_TOKEN= @@ -61,6 +65,9 @@ ml-intern "fine-tune llama on my dataset" ```bash ml-intern --model anthropic/claude-opus-4-7 "your prompt" # requires ANTHROPIC_API_KEY ml-intern --model openai/gpt-5.5 "your prompt" # requires OPENAI_API_KEY +ml-intern --model vertex_ai/gemini-3.1-pro "your prompt" # GCP Vertex AI (ADC + VERTEXAI_*) +ml-intern --model vertex_ai/gemini-3.5-flash "your prompt" # GCP Vertex AI +ml-intern --model gemini/gemini-3.5-flash "your prompt" # Google AI Studio (GEMINI_API_KEY) ml-intern --model ollama/llama3.1:8b "your prompt" ml-intern --model vllm/meta-llama/Llama-3.1-8B-Instruct "your prompt" ml-intern --sandbox-tools "your prompt" # use HF Space sandbox tools @@ -98,6 +105,27 @@ one shared local endpoint, or override a specific provider with its matching `VLLM_API_KEY`. Provider-specific variables take precedence over the shared local variables. Base URLs may include or omit `/v1`. +**GCP models (Vertex AI / Gemini):** + +Google Cloud models route through LiteLLM, no extra dependency needed: + +```text +/model vertex_ai/gemini-3.1-pro # Gemini 3.1 Pro on Vertex AI +/model vertex_ai/gemini-3.5-flash # Gemini 3.5 Flash on Vertex AI +/model vertex_ai/claude-opus-4-6 # Anthropic models via Vertex Model Garden +/model gemini/gemini-3.5-flash # Google AI Studio (simpler, key-only) +``` + +- `vertex_ai/` — full Vertex AI. Set `VERTEXAI_PROJECT` and + `VERTEXAI_LOCATION`, and authenticate with Application Default Credentials: + either `GOOGLE_APPLICATION_CREDENTIALS` pointing at a service-account JSON, + or run `gcloud auth application-default login`. Covers Gemini and the + Anthropic models published in Vertex Model Garden. +- `gemini/` — Google AI Studio. Only needs `GEMINI_API_KEY`. + +Reasoning effort is not forwarded to these providers (each has its own +thinking shape); calls run without an effort level. + **CLI tool runtime:** By default, the CLI runs `bash`, `read`, `write`, and `edit` on your local diff --git a/agent/core/agent_loop.py b/agent/core/agent_loop.py index 0f84351f..867c10dd 100644 --- a/agent/core/agent_loop.py +++ b/agent/core/agent_loop.py @@ -558,7 +558,11 @@ def _friendly_error_message(error: Exception) -> str | None: "To fix this, set the API key for your model provider:\n" " • Anthropic: export ANTHROPIC_API_KEY=sk-...\n" " • OpenAI: export OPENAI_API_KEY=sk-...\n" - " • HF Router: export HF_TOKEN=hf_...\n\n" + " • HF Router: export HF_TOKEN=hf_...\n" + " • Gemini (AI Studio): export GEMINI_API_KEY=...\n" + " • Vertex AI: export VERTEXAI_PROJECT=... VERTEXAI_LOCATION=...\n" + " then: gcloud auth application-default login\n" + " (or export GOOGLE_APPLICATION_CREDENTIALS=/path/sa.json)\n\n" "You can also add it to a .env file in the project root.\n" "To switch models, use the /model command." ) diff --git a/agent/core/llm_params.py b/agent/core/llm_params.py index f95695fb..02eec3f6 100644 --- a/agent/core/llm_params.py +++ b/agent/core/llm_params.py @@ -171,6 +171,15 @@ def _resolve_llm_params( • ``openai/`` — ``reasoning_effort`` forwarded as a top-level kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``. + • ``bedrock/`` — AWS Bedrock via the Converse adapter; creds + come from the standard ``AWS_*`` env vars. + + • ``vertex_ai/`` / ``gemini/`` — GCP-hosted models. + ``vertex_ai/`` covers Gemini and Anthropic Model Garden models and + reads ``VERTEXAI_PROJECT`` / ``VERTEXAI_LOCATION`` + Application + Default Credentials; ``gemini/`` is Google AI Studio and reads + ``GEMINI_API_KEY``. ``reasoning_effort`` is not forwarded. + • ``ollama/``, ``vllm/``, ``lm_studio/``, and ``llamacpp/`` — local OpenAI-compatible endpoints. The id prefix selects a configurable localhost base URL, and the model suffix is sent @@ -231,6 +240,23 @@ def _resolve_llm_params( # the same way, so we leave it off for now. return {"model": model_name} + if model_name.startswith(("vertex_ai/", "gemini/")): + # GCP-hosted models via LiteLLM: + # ``vertex_ai/`` — Vertex AI. Covers Gemini + # (``vertex_ai/gemini-3.1-pro``, ``vertex_ai/gemini-3.5-flash``) + # and Anthropic models served through Model Garden + # (``vertex_ai/claude-opus-4-...``). + # LiteLLM reads ``VERTEXAI_PROJECT`` / ``VERTEXAI_LOCATION`` from + # the env and authenticates via Application Default Credentials + # (``GOOGLE_APPLICATION_CREDENTIALS`` service-account JSON, or + # ``gcloud auth application-default login``). + # ``gemini/`` — Google AI Studio. Simpler path: LiteLLM + # reads a single ``GEMINI_API_KEY`` from the env. + # As with ``bedrock/``, each provider has its own thinking/effort + # shape, so we don't forward ``reasoning_effort`` here — the probe + # cascade resolves effort to "off" and real calls run without it. + return {"model": model_name} + if model_name.startswith("openai/"): params = {"model": model_name} if reasoning_effort: diff --git a/agent/core/model_switcher.py b/agent/core/model_switcher.py index 34eaccdd..b88ebe33 100644 --- a/agent/core/model_switcher.py +++ b/agent/core/model_switcher.py @@ -42,6 +42,8 @@ "id": "bedrock/us.anthropic.claude-opus-4-6-v1", "label": "Claude Opus 4.6 via Bedrock", }, + {"id": "vertex_ai/gemini-3.1-pro", "label": "Gemini 3.1 Pro via Vertex AI"}, + {"id": "vertex_ai/gemini-3.5-flash", "label": "Gemini 3.5 Flash via Vertex AI"}, {"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"}, {"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"}, {"id": "zai-org/GLM-5.1", "label": "GLM 5.1"}, @@ -50,7 +52,14 @@ _ROUTING_POLICIES = {"fastest", "cheapest", "preferred"} -_DIRECT_PREFIXES = ("anthropic/", "openai/", *LOCAL_MODEL_PREFIXES) +_DIRECT_PREFIXES = ( + "anthropic/", + "openai/", + "bedrock/", + "vertex_ai/", + "gemini/", + *LOCAL_MODEL_PREFIXES, +) _LOCAL_PROBE_TIMEOUT = 15.0 diff --git a/agent/main.py b/agent/main.py index ac1a40f4..c98fcb49 100644 --- a/agent/main.py +++ b/agent/main.py @@ -1618,6 +1618,12 @@ def cli(): warnings.filterwarnings("ignore", category=DeprecationWarning, module="litellm") # Suppress whoosh invalid escape sequence warnings (third-party, unfixed upstream) warnings.filterwarnings("ignore", category=SyntaxWarning, module="whoosh") + # Suppress Pydantic serializer warnings raised when LiteLLM serializes + warnings.filterwarnings( + "ignore", + message="Pydantic serializer warnings", + category=UserWarning, + ) parser = argparse.ArgumentParser(description="Hugging Face Agent CLI") parser.add_argument( diff --git a/tests/unit/test_llm_params.py b/tests/unit/test_llm_params.py index a7c7b4cd..8b73de9a 100644 --- a/tests/unit/test_llm_params.py +++ b/tests/unit/test_llm_params.py @@ -32,6 +32,43 @@ def test_openai_max_effort_is_still_rejected(): raise AssertionError("Expected UnsupportedEffortError for max effort") +def test_vertex_ai_gemini_id_passes_through_untouched(): + params = _resolve_llm_params("vertex_ai/gemini-3.5-flash") + + assert params == {"model": "vertex_ai/gemini-3.5-flash"} + + +def test_vertex_ai_anthropic_model_garden_id_passes_through(): + params = _resolve_llm_params("vertex_ai/claude-opus-4-6") + + assert params == {"model": "vertex_ai/claude-opus-4-6"} + + +def test_gemini_ai_studio_id_passes_through_untouched(): + params = _resolve_llm_params("gemini/gemini-3.5-flash") + + assert params == {"model": "gemini/gemini-3.5-flash"} + + +def test_vertex_ai_id_does_not_fall_through_to_hf_router(): + # Regression: before the vertex_ai/ branch existed, this id was mangled + # into ``openai/vertex_ai/...`` and sent to the HF router. + params = _resolve_llm_params("vertex_ai/gemini-3.5-flash") + + assert "api_base" not in params + assert params["model"] == "vertex_ai/gemini-3.5-flash" + + +def test_gemini_id_drops_reasoning_effort_in_non_strict_mode(): + params = _resolve_llm_params( + "gemini/gemini-3.5-flash", + reasoning_effort="high", + strict=False, + ) + + assert params == {"model": "gemini/gemini-3.5-flash"} + + def test_resolve_ollama_params_adds_v1_and_uses_default_key(monkeypatch): monkeypatch.delenv("OLLAMA_API_KEY", raising=False) monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434")