huggingface · gduchidze · May 24, 2026
diff --git a/README.md b/README.md
@@ -33,6 +33,10 @@ Create a `.env` file in the project root (or export these in your shell):
 ```bash
 ANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models
 OPENAI_API_KEY=<your-openai-api-key> # if using openai models
+GEMINI_API_KEY=<your-ai-studio-key> # if using gemini/ (Google AI Studio) models
+VERTEXAI_PROJECT=<gcp-project-id> # if using vertex_ai/ models
+VERTEXAI_LOCATION=us-central1 # GCP region for vertex_ai/ models
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json # ADC for vertex_ai/
 LOCAL_LLM_BASE_URL=http://localhost:8000 # shared fallback for local model prefixes
 LOCAL_LLM_API_KEY=<optional-local-api-key> # optional shared local API key
 HF_TOKEN=<your-hugging-face-token>
@@ -61,6 +65,9 @@ ml-intern "fine-tune llama on my dataset"
 ```bash
 ml-intern --model anthropic/claude-opus-4-7 "your prompt"   # requires ANTHROPIC_API_KEY
 ml-intern --model openai/gpt-5.5 "your prompt"              # requires OPENAI_API_KEY
+ml-intern --model vertex_ai/gemini-3.1-pro "your prompt"    # GCP Vertex AI (ADC + VERTEXAI_*)
+ml-intern --model vertex_ai/gemini-3.5-flash "your prompt"  # GCP Vertex AI
+ml-intern --model gemini/gemini-3.5-flash "your prompt"     # Google AI Studio (GEMINI_API_KEY)
 ml-intern --model ollama/llama3.1:8b "your prompt"
 ml-intern --model vllm/meta-llama/Llama-3.1-8B-Instruct "your prompt"
 ml-intern --sandbox-tools "your prompt"                         # use HF Space sandbox tools
@@ -98,6 +105,27 @@ one shared local endpoint, or override a specific provider with its matching
 `VLLM_API_KEY`. Provider-specific variables take precedence over the shared
 local variables. Base URLs may include or omit `/v1`.
 
+**GCP models (Vertex AI / Gemini):**
+
+Google Cloud models route through LiteLLM, no extra dependency needed:
+
+```text
+/model vertex_ai/gemini-3.1-pro      # Gemini 3.1 Pro on Vertex AI
+/model vertex_ai/gemini-3.5-flash    # Gemini 3.5 Flash on Vertex AI
+/model vertex_ai/claude-opus-4-6     # Anthropic models via Vertex Model Garden
+/model gemini/gemini-3.5-flash       # Google AI Studio (simpler, key-only)
+```
+
+- `vertex_ai/<model>` — full Vertex AI. Set `VERTEXAI_PROJECT` and
+  `VERTEXAI_LOCATION`, and authenticate with Application Default Credentials:
+  either `GOOGLE_APPLICATION_CREDENTIALS` pointing at a service-account JSON,
+  or run `gcloud auth application-default login`. Covers Gemini and the
+  Anthropic models published in Vertex Model Garden.
+- `gemini/<model>` — Google AI Studio. Only needs `GEMINI_API_KEY`.
+
+Reasoning effort is not forwarded to these providers (each has its own
+thinking shape); calls run without an effort level.
+
 **CLI tool runtime:**
 
 By default, the CLI runs `bash`, `read`, `write`, and `edit` on your local

diff --git a/agent/core/agent_loop.py b/agent/core/agent_loop.py
@@ -558,7 +558,11 @@ def _friendly_error_message(error: Exception) -> str | None:
             "To fix this, set the API key for your model provider:\n"
             "  • Anthropic:   export ANTHROPIC_API_KEY=sk-...\n"
             "  • OpenAI:      export OPENAI_API_KEY=sk-...\n"
-            "  • HF Router:   export HF_TOKEN=hf_...\n\n"
+            "  • HF Router:   export HF_TOKEN=hf_...\n"
+            "  • Gemini (AI Studio):  export GEMINI_API_KEY=...\n"
+            "  • Vertex AI:   export VERTEXAI_PROJECT=... VERTEXAI_LOCATION=...\n"
+            "                 then: gcloud auth application-default login\n"
+            "                 (or export GOOGLE_APPLICATION_CREDENTIALS=/path/sa.json)\n\n"
             "You can also add it to a .env file in the project root.\n"
             "To switch models, use the /model command."
         )

diff --git a/agent/core/llm_params.py b/agent/core/llm_params.py
@@ -171,6 +171,15 @@ def _resolve_llm_params(
     • ``openai/<model>`` — ``reasoning_effort`` forwarded as a top-level
       kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``.
 
+    • ``bedrock/<model>`` — AWS Bedrock via the Converse adapter; creds
+      come from the standard ``AWS_*`` env vars.
+
+    • ``vertex_ai/<model>`` / ``gemini/<model>`` — GCP-hosted models.
+      ``vertex_ai/`` covers Gemini and Anthropic Model Garden models and
+      reads ``VERTEXAI_PROJECT`` / ``VERTEXAI_LOCATION`` + Application
+      Default Credentials; ``gemini/`` is Google AI Studio and reads
+      ``GEMINI_API_KEY``. ``reasoning_effort`` is not forwarded.
+
     • ``ollama/<model>``, ``vllm/<model>``, ``lm_studio/<model>``, and
       ``llamacpp/<model>`` — local OpenAI-compatible endpoints. The id prefix
       selects a configurable localhost base URL, and the model suffix is sent
@@ -231,6 +240,23 @@ def _resolve_llm_params(
         # the same way, so we leave it off for now.
         return {"model": model_name}
 
+    if model_name.startswith(("vertex_ai/", "gemini/")):
+        # GCP-hosted models via LiteLLM:
+        #   ``vertex_ai/<model>``  — Vertex AI. Covers Gemini
+        #     (``vertex_ai/gemini-3.1-pro``, ``vertex_ai/gemini-3.5-flash``)
+        #     and Anthropic models served through Model Garden
+        #     (``vertex_ai/claude-opus-4-...``).
+        #     LiteLLM reads ``VERTEXAI_PROJECT`` / ``VERTEXAI_LOCATION`` from
+        #     the env and authenticates via Application Default Credentials
+        #     (``GOOGLE_APPLICATION_CREDENTIALS`` service-account JSON, or
+        #     ``gcloud auth application-default login``).
+        #   ``gemini/<model>``     — Google AI Studio. Simpler path: LiteLLM
+        #     reads a single ``GEMINI_API_KEY`` from the env.
+        # As with ``bedrock/``, each provider has its own thinking/effort
+        # shape, so we don't forward ``reasoning_effort`` here — the probe
+        # cascade resolves effort to "off" and real calls run without it.
+        return {"model": model_name}
+
     if model_name.startswith("openai/"):
         params = {"model": model_name}
         if reasoning_effort:

diff --git a/agent/core/model_switcher.py b/agent/core/model_switcher.py
@@ -42,6 +42,8 @@
         "id": "bedrock/us.anthropic.claude-opus-4-6-v1",
         "label": "Claude Opus 4.6 via Bedrock",
     },
+    {"id": "vertex_ai/gemini-3.1-pro", "label": "Gemini 3.1 Pro via Vertex AI"},
+    {"id": "vertex_ai/gemini-3.5-flash", "label": "Gemini 3.5 Flash via Vertex AI"},
     {"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"},
     {"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"},
     {"id": "zai-org/GLM-5.1", "label": "GLM 5.1"},
@@ -50,7 +52,14 @@
 
 
 _ROUTING_POLICIES = {"fastest", "cheapest", "preferred"}
-_DIRECT_PREFIXES = ("anthropic/", "openai/", *LOCAL_MODEL_PREFIXES)
+_DIRECT_PREFIXES = (
+    "anthropic/",
+    "openai/",
+    "bedrock/",
+    "vertex_ai/",
+    "gemini/",
+    *LOCAL_MODEL_PREFIXES,
+)
 _LOCAL_PROBE_TIMEOUT = 15.0
 
 

diff --git a/agent/main.py b/agent/main.py
@@ -1618,6 +1618,12 @@ def cli():
     warnings.filterwarnings("ignore", category=DeprecationWarning, module="litellm")
     # Suppress whoosh invalid escape sequence warnings (third-party, unfixed upstream)
     warnings.filterwarnings("ignore", category=SyntaxWarning, module="whoosh")
+    # Suppress Pydantic serializer warnings raised when LiteLLM serializes
+    warnings.filterwarnings(
+        "ignore",
+        message="Pydantic serializer warnings",
+        category=UserWarning,
+    )
 
     parser = argparse.ArgumentParser(description="Hugging Face Agent CLI")
     parser.add_argument(

diff --git a/tests/unit/test_llm_params.py b/tests/unit/test_llm_params.py
@@ -32,6 +32,43 @@ def test_openai_max_effort_is_still_rejected():
         raise AssertionError("Expected UnsupportedEffortError for max effort")
 
 
+def test_vertex_ai_gemini_id_passes_through_untouched():
+    params = _resolve_llm_params("vertex_ai/gemini-3.5-flash")
+
+    assert params == {"model": "vertex_ai/gemini-3.5-flash"}
+
+
+def test_vertex_ai_anthropic_model_garden_id_passes_through():
+    params = _resolve_llm_params("vertex_ai/claude-opus-4-6")
+
+    assert params == {"model": "vertex_ai/claude-opus-4-6"}
+
+
+def test_gemini_ai_studio_id_passes_through_untouched():
+    params = _resolve_llm_params("gemini/gemini-3.5-flash")
+
+    assert params == {"model": "gemini/gemini-3.5-flash"}
+
+
+def test_vertex_ai_id_does_not_fall_through_to_hf_router():
+    # Regression: before the vertex_ai/ branch existed, this id was mangled
+    # into ``openai/vertex_ai/...`` and sent to the HF router.
+    params = _resolve_llm_params("vertex_ai/gemini-3.5-flash")
+
+    assert "api_base" not in params
+    assert params["model"] == "vertex_ai/gemini-3.5-flash"
+
+
+def test_gemini_id_drops_reasoning_effort_in_non_strict_mode():
+    params = _resolve_llm_params(
+        "gemini/gemini-3.5-flash",
+        reasoning_effort="high",
+        strict=False,
+    )
+
+    assert params == {"model": "gemini/gemini-3.5-flash"}
+
+
 def test_resolve_ollama_params_adds_v1_and_uses_default_key(monkeypatch):
     monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
     monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434")