diff --git a/src/microbots/llm/ollama_local.py b/src/microbots/llm/ollama_local.py index c775631..0ba66d9 100644 --- a/src/microbots/llm/ollama_local.py +++ b/src/microbots/llm/ollama_local.py @@ -91,7 +91,8 @@ def _send_request_to_local_model(self, messages): headers = { "Content-Type": "application/json" } - response = requests.post(server, json=payload, headers=headers) + # Set timeout: 30 seconds connect, 600 seconds read to handle model cold start + response = requests.post(server, json=payload, headers=headers, timeout=(30, 600)) logger.debug(f"\nResponse Code: {response.status_code}\nResponse Text:\n{response.text}\n---") if response.status_code == 200: response_json = response.json() diff --git a/test/llm/conftest.py b/test/llm/conftest.py index 2e75312..a402bfc 100644 --- a/test/llm/conftest.py +++ b/test/llm/conftest.py @@ -226,8 +226,48 @@ def ollama_env_config(ollama_model_name, ollama_model_port): } +@pytest.fixture(scope="session") +def ollama_model_warmed_up(ollama_server, ollama_env_config): + """ + Warm up the Ollama model by making an initial request. + + This fixture ensures the model is loaded into memory before tests run, + which is especially important on CPU-only CI runners where cold starts + can cause connection timeouts. + """ + model_name = ollama_env_config["LOCAL_MODEL_NAME"] + model_port = ollama_env_config["LOCAL_MODEL_PORT"] + + print(f"\nWarming up Ollama model: {model_name}...") + + try: + # Send a simple request to load the model into memory + response = requests.post( + f"http://localhost:{model_port}/api/generate", + json={ + "model": model_name, + "prompt": "hi", + "stream": False + }, + headers={"Content-Type": "application/json"}, + timeout=(30, 600) # 30s connect, 600s read for model loading + ) + + if response.status_code == 200: + print(f"Model {model_name} warmed up successfully!") + else: + print(f"Warning: Model warm-up returned status {response.status_code}") + + except requests.exceptions.Timeout: + print(f"Warning: Model warm-up timed out. Tests may experience slow first responses.") + except Exception as e: + print(f"Warning: Model warm-up failed: {e}. Tests may experience slow first responses.") + + return True + + @pytest.fixture -def ollama_local_ready(ollama_server, ollama_env_config, monkeypatch): +def ollama_local_ready(ollama_server, ollama_env_config, ollama_model_warmed_up, monkeypatch): """ Complete setup fixture that ensures Ollama is ready for testing.