diff --git a/.env.example b/.env.example
index 7585eac7..d11adaaa 100644
--- a/.env.example
+++ b/.env.example
@@ -167,20 +167,16 @@ EVA_MODEL__LLM=gpt-5.2
 # GOOGLE_API_KEY=your_google_api_key_here
 
 # ==============================================
-# Optional: Realtime / Audio-LLM Configuration
+# Optional: Speech-to-Speech / Audio-LLM Configuration
 # ==============================================
-# Only needed if benchmarking speech-to-speech or realtime models.
+# Only needed if benchmarking speech-to-speech models.
 
-# EVA_MODEL__REALTIME_MODEL=gpt-realtime-mini
-# EVA_MODEL__REALTIME_MODEL_PARAMS='{"voice":"marin"}'
+# EVA_MODEL__S2S=openai
+# EVA_MODEL__S2S_PARAMS='{"model": "gpt-realtime-mini", "api_key": ""}'
 
 # EVA_MODEL__AUDIO_LLM=
 # EVA_MODEL__AUDIO_LLM_PARAMS='{"url": "", "api_key": ""}'
 
-# Azure Realtime credentials (if using Azure realtime models)
-# AZURE_OPENAI_REALTIME_API_KEY=
-# AZURE_OPENAI_REALTIME_ENDPOINT=
-
 # ==============================================
 # Optional: Execution Settings
 # ==============================================
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 2d00e8ff..ad62d66d 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -3,7 +3,6 @@ name: Tests
 on:
   merge_group:
   pull_request:
-    branches: [main]
 
 jobs:
   test:
diff --git a/README.md b/README.md
index 0ba0d575..2e492373 100644
--- a/README.md
+++ b/README.md
@@ -26,16 +26,25 @@ Agents that score well on task completion tend to score worse on conversational
 <details>
 <summary><h2>Quick Start</h2></summary>
 
+### Cloning the Repository
+
+If you're only interested in running the latest stable version of EVA, you can clone with `--branch latest`, and optionally speed things up with `--depth 1 --no-tags --single-branch`.
+```bash
+git clone https://github.com/ServiceNow/eva.git --branch latest --depth 1 --no-tags --single-branch
+```
+
+Otherwise, for development, you can clone the default branch, `main`.
+```bash
+git clone https://github.com/ServiceNow/eva.git
+```
+
 ### Installation
 
 We recommend using [uv](https://docs.astral.sh/uv/) for fast, reliable dependency management. If you don't have `uv` installed, see the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation/).
 
-> [!NOTE]
-> This project requires **Python 3.11–3.13** (set via `requires-python` in `pyproject.toml`). `uv` will automatically select a compatible version. If you're using pip, make sure you're running a supported Python version.
+This project requires **Python 3.11–3.13** (set via `requires-python` in `pyproject.toml`). `uv` will automatically select a compatible version. If you're using pip, make sure you're running a supported Python version.
 
 ```bash
-# Clone the repository
-git clone https://github.com/ServiceNow/eva.git
 cd eva
 
 # Install all dependencies (uv automatically creates a virtual environment)
@@ -46,18 +55,16 @@ cp .env.example .env
 # Edit .env with your API keys (ELEVENLABS_API_KEY, OPENAI_API_KEY required)
 ```
 
-> [!TIP]
-> After installation, you can run EVA using either:
-> - `eva` — CLI entry point (e.g., `eva --help`)
-> - `python main.py` — script at the repo root (e.g., `python main.py --help`)
->
-> If using an IDE, point your Python interpreter to `.venv/bin/python` so commands run in the virtual environment automatically. Otherwise, prefix commands with `uv run` or activate the environment with `source .venv/bin/activate`.
+After installation, you can run EVA using either:
+- `eva` — CLI entry point (e.g., `eva --help`)
+- `python main.py` — script at the repo root (e.g., `python main.py --help`)
+
+If using an IDE, point your Python interpreter to `.venv/bin/python` so commands run in the virtual environment automatically. Otherwise, prefix commands with `uv run` or activate the environment with `source .venv/bin/activate`.
 
 <details>
 <summary>Alternative: using pip</summary>
 
-> [!NOTE]
-> This project requires Python 3.11. If you need to manage multiple Python versions, consider using [pyenv](https://github.com/pyenv/pyenv).
+This project requires Python 3.11. If you need to manage multiple Python versions, consider using [pyenv](https://github.com/pyenv/pyenv).
 
 ```bash
 # Create and activate a virtual environment
diff --git a/src/eva/assistant/pipeline/audio_llm_processor.py b/src/eva/assistant/pipeline/audio_llm_processor.py
index a9154d4e..bb5b24b3 100644
--- a/src/eva/assistant/pipeline/audio_llm_processor.py
+++ b/src/eva/assistant/pipeline/audio_llm_processor.py
@@ -19,7 +19,6 @@
 import asyncio
 import base64
 import io
-import os
 import time
 import wave
 from collections.abc import Awaitable
@@ -418,7 +417,7 @@ def __init__(
         super().__init__(**kwargs)
         self._audio_collector = audio_collector
         params = params or {}
-        self._api_key = params.get("api_key") or os.getenv("OPENAI_API_KEY")
+        self._api_key = params.get["api_key"]
         self._model = model
         self._system_prompt = system_prompt or self.DEFAULT_SYSTEM_PROMPT
         self._sample_rate = sample_rate
diff --git a/src/eva/assistant/pipeline/observers.py b/src/eva/assistant/pipeline/observers.py
index df1a50d5..a3755d48 100644
--- a/src/eva/assistant/pipeline/observers.py
+++ b/src/eva/assistant/pipeline/observers.py
@@ -22,6 +22,7 @@
 from pipecat.observers.turn_tracking_observer import TurnTrackingObserver
 from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
 from pipecat.services.llm_service import LLMService
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.services.stt_service import STTService
 from pipecat.services.tts_service import TTSService
 
@@ -31,7 +32,7 @@
 logger = get_logger(__name__)
 
 
-_TRANSCRIPTION_SERVICES = (STTService, AzureRealtimeLLMService)
+_TRANSCRIPTION_SERVICES = (STTService, AzureRealtimeLLMService, OpenAIRealtimeLLMService)
 
 
 class WallClock(SystemClock):
diff --git a/src/eva/assistant/pipeline/realtime_llm.py b/src/eva/assistant/pipeline/realtime_llm.py
index 7d30bac2..b502b4df 100644
--- a/src/eva/assistant/pipeline/realtime_llm.py
+++ b/src/eva/assistant/pipeline/realtime_llm.py
@@ -1,6 +1,6 @@
 """Instrumented realtime LLM service for correct audit log ordering and timestamps.
 
-Subclasses AzureRealtimeLLMService to intercept raw OpenAI Realtime API events
+Subclasses OpenAIRealtimeLLMService to intercept raw OpenAI Realtime API events
 (speech_started, speech_stopped, transcription.completed, response.done) which
 have a guaranteed ordering and carry item_id for correlation.
 
@@ -11,17 +11,24 @@
 Writing user entries on #3 and assistant entries on #5 guarantees correct order.
 """
 
+import struct
 import time
 from dataclasses import dataclass
 from typing import Any, Optional
 
-from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
+from pipecat.frames.frames import Frame, InputAudioRawFrame, VADUserStartedSpeakingFrame, VADUserStoppedSpeakingFrame
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 
 from eva.assistant.agentic.audit_log import AuditLog
 from eva.utils.logging import get_logger
 
 logger = get_logger(__name__)
 
+# Audio threshold for detecting speech vs silence
+# RMS values below this are considered silence
+SILENCE_RMS_THRESHOLD = 10
+
 
 @dataclass
 class _UserTurnRecord:
@@ -39,8 +46,20 @@ def _wall_ms() -> str:
     return str(int(round(time.time() * 1000)))
 
 
-class InstrumentedRealtimeLLMService(AzureRealtimeLLMService):
-    """AzureRealtimeLLMService subclass that writes audit log entries with correct ordering and wall-clock timestamps derived from Realtime API events.
+def _calculate_rms(audio_bytes: bytes) -> float:
+    """Calculate RMS (root mean square) energy of 16-bit PCM audio."""
+    if len(audio_bytes) < 2:
+        return 0.0
+    num_samples = len(audio_bytes) // 2
+    samples = struct.unpack(f"<{num_samples}h", audio_bytes[: num_samples * 2])
+    if not samples:
+        return 0.0
+    sum_squares = sum(s * s for s in samples)
+    return (sum_squares / len(samples)) ** 0.5
+
+
+class InstrumentedRealtimeLLMService(OpenAIRealtimeLLMService):
+    """OpenAIRealtimeLLMService subclass that writes audit log entries with correct ordering and wall-clock timestamps derived from Realtime API events.
 
     All overridden methods call ``super()`` first so that the parent's frame
     processing (audio playback, interruption handling, metrics, etc.) is fully
@@ -61,12 +80,35 @@ def __init__(self, *, audit_log: AuditLog, **kwargs: Any) -> None:
         # Track whether we're mid-assistant-response (for interruption flushing)
         self._assistant_responding: bool = False
 
+        # Track audio frame timing for VAD delay calculation
+        self._last_audio_frame_time: Optional[float] = None
+        self._vad_delay_ms: Optional[int] = None
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
+        """Track audio frame timing before passing to parent.
+
+        Only updates the timestamp when audio has actual speech content (not silence),
+        so VAD delay calculation reflects when user actually stopped speaking.
+        """
+        if isinstance(frame, InputAudioRawFrame):
+            rms = _calculate_rms(frame.audio)
+            if rms > SILENCE_RMS_THRESHOLD:
+                self._last_audio_frame_time = time.time()
+
+        await super().process_frame(frame, direction)
+
     async def _handle_evt_speech_started(self, evt: Any) -> None:
         """Fires when user starts speaking (input_audio_buffer.speech_started).
 
         Captures wall-clock start time.  Also flushes any in-progress
         interrupted assistant response before recording the new user turn.
         """
+        # Reset VAD tracking for new turn
+        self._vad_delay_ms = None
+
+        # Broadcast VAD user started speaking frame because realtime VAD does not broadcast it themselves
+        await self.broadcast_frame(VADUserStartedSpeakingFrame)
+
         # Flush interrupted assistant response if one is in progress
         if self._assistant_responding and self._current_assistant_transcript_parts:
             partial_text = "".join(self._current_assistant_transcript_parts) + " [interrupted]"
@@ -92,8 +134,21 @@ async def _handle_evt_speech_started(self, evt: Any) -> None:
     async def _handle_evt_speech_stopped(self, evt: Any) -> None:
         """Fires when user stops speaking (input_audio_buffer.speech_stopped).
 
-        Captures wall-clock end time for the user turn.
+        Captures wall-clock end time for the user turn and calculates VAD delay.
         """
+        speech_stopped_time = time.time()
+
+        # Calculate VAD delay: time between last audio frame and speech_stopped event
+        if self._last_audio_frame_time is not None:
+            self._vad_delay_ms = int((speech_stopped_time - self._last_audio_frame_time) * 1000)
+        else:
+            logger.warning("speech_stopped fired but no audio frames were tracked")
+            self._vad_delay_ms = None
+
+        # Reset audio tracking for next turn
+        self._last_audio_frame_time = None
+
+        await self.broadcast_frame(VADUserStoppedSpeakingFrame)
         await super()._handle_evt_speech_stopped(evt)
 
         item_id = getattr(evt, "item_id", None) or ""
@@ -145,6 +200,7 @@ async def _handle_evt_audio_delta(self, evt: Any) -> None:
         """Fires for each audio chunk of the assistant response.
 
         Captures wall-clock of the *first* delta as assistant response start.
+        Also logs the full user-perceived response latency including VAD delay.
         """
         await super()._handle_evt_audio_delta(evt)
 
@@ -152,6 +208,24 @@ async def _handle_evt_audio_delta(self, evt: Any) -> None:
             self._assistant_response_start_wall_ms = _wall_ms()
             self._assistant_responding = True
 
+            # Log full user-perceived latency (includes VAD delay)
+            if self._vad_delay_ms is not None:
+                # Find the most recent user turn to get speech_stopped time
+                recent_record = None
+                for record in self._user_turns.values():
+                    if record.speech_stopped_wall_ms:
+                        recent_record = record
+
+                if recent_record and recent_record.speech_stopped_wall_ms:
+                    speech_stopped_ms = int(recent_record.speech_stopped_wall_ms)
+                    response_start_ms = int(self._assistant_response_start_wall_ms)
+                    vad_to_response_ms = response_start_ms - speech_stopped_ms
+                    full_latency_ms = vad_to_response_ms + self._vad_delay_ms
+                    logger.debug(
+                        f"Full response latency: {full_latency_ms}ms "
+                        f"(VAD delay: {self._vad_delay_ms}ms + response: {vad_to_response_ms}ms)"
+                    )
+
     async def _handle_evt_audio_transcript_delta(self, evt: Any) -> None:
         """Fires for incremental assistant transcript text.
 
@@ -220,6 +294,16 @@ def _reset_assistant_state(self) -> None:
         self._assistant_response_start_wall_ms = None
         self._assistant_responding = False
 
+    @property
+    def last_vad_delay_ms(self) -> Optional[int]:
+        """Return the most recent VAD delay in milliseconds.
+
+        This is the time between when audio frames stopped arriving and when
+        OpenAI's VAD detected end of speech. Can be used to adjust response
+        latency measurements to reflect user-perceived latency.
+        """
+        return self._vad_delay_ms
+
     @staticmethod
     def _response_has_function_calls(evt: Any) -> bool:
         """Return True if the response.done event contains any function_call outputs."""
diff --git a/src/eva/assistant/pipeline/services.py b/src/eva/assistant/pipeline/services.py
index 1fcdf76d..c8ee3eff 100644
--- a/src/eva/assistant/pipeline/services.py
+++ b/src/eva/assistant/pipeline/services.py
@@ -4,7 +4,6 @@
 """
 
 import datetime
-import os
 from typing import Any, AsyncGenerator, Optional
 
 from deepgram import LiveOptions
@@ -20,7 +19,6 @@
     AssemblyAIConnectionParams,
     AssemblyAISTTService,
 )
-from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
 from pipecat.services.cartesia.stt import CartesiaLiveOptions, CartesiaSTTService
 from pipecat.services.cartesia.tts import CartesiaTTSService
 from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
@@ -37,12 +35,14 @@
     SemanticTurnDetection,
     SessionProperties,
 )
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.services.openai.stt import OpenAISTTService
 from pipecat.services.openai.tts import VALID_VOICES, OpenAITTSService
 from pipecat.services.stt_service import STTService
 from pipecat.services.tts_service import TTSService
 from pipecat.transcriptions.language import Language
 from pipecat.utils.text.base_text_filter import BaseTextFilter
+from websockets.asyncio.client import connect as websocket_connect
 
 from eva.assistant.pipeline.alm_vllm import ALMvLLMClient
 from eva.assistant.pipeline.nvidia_baseten import BasetenSTTService, BasetenTTSService
@@ -381,6 +381,15 @@ def create_realtime_llm_service(
     """
     model_lower = (model or "").lower()
 
+    # Get realtime server prompt
+    prompt_manager = PromptManager()
+    system_prompt = prompt_manager.get_prompt(
+        "realtime_agent.system_prompt",
+        agent_personality=agent.description,
+        agent_instructions=agent.instructions,
+        datetime=current_date_time,
+    )
+
     openai_tools = agent.build_tools_for_realtime() if agent else None
 
     # Convert OpenAI format tools to pipecat format
@@ -400,66 +409,66 @@ def create_realtime_llm_service(
                 )
         pipecat_tools = ToolsSchema(standard_tools=function_schemas)
 
-    # Get realtime server prompt
-    prompt_manager = PromptManager()
-    system_prompt = prompt_manager.get_prompt(
-        "realtime_agent.system_prompt",
-        agent_personality=agent.description,
-        agent_instructions=agent.instructions,
-        datetime=current_date_time,
-    )
-
-    if model_lower.startswith("gpt-realtime"):
-        #
-        # base_url =The full Azure WebSocket endpoint URL including api-version and deployment.
-        # Example: "wss://my-project.openai.azure.com/openai/v1/realtime"
-        url = os.environ.get("AZURE_OPENAI_REALTIME_ENDPOINT", "")
-        url += f"?model={model_lower}"
-
-        session_properties = SessionProperties(
-            instructions=system_prompt,
-            audio=AudioConfiguration(
-                input=AudioInput(
-                    transcription=InputAudioTranscription(model="whisper-1"),
-                    # Set openai TurnDetection parameters. Not setting this at all will turn it
-                    # on by default
-                    turn_detection=SemanticTurnDetection(),
-                    # Or set to False to disable openai turn detection and use transport VAD
-                    # turn_detection=False,
-                    # noise_reduction=InputAudioNoiseReduction(type="near_field"),
-                ),
-                output=AudioOutput(
-                    voice=params.get("voice", "marin"),
+    if model_lower.startswith("openai"):
+        session_properties = get_openai_session_properties(system_prompt, params, pipecat_tools)
+        if audit_log is not None:
+            logger.info(f"Using InstrumentedRealtimeLLMService for audit log interception: openai: {params['model']}")
+            return InstrumentedRealtimeLLMService(
+                settings=OpenAIRealtimeLLMService.Settings(
+                    model=params["model"],
+                    session_properties=session_properties,
                 ),
+                audit_log=audit_log,
+                api_key=params["api_key"],
+            )
+
+        return OpenAIRealtimeLLMService(
+            api_key=params["api_key"],
+            settings=OpenAIRealtimeLLMService.Settings(
+                model=params["model"],
+                session_properties=session_properties,
             ),
-            tools=pipecat_tools,
-            tool_choice="auto",
         )
-        logger.info(f"Using Azure Realtime LLM: {model_lower}")
+    elif model_lower.startswith("azure") or model_lower.startswith("gpt-realtime"):
+        #
+        # base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
+        # Example: "wss://my-project.openai.azure.com/openai/v1/realtime"
+        url = params.get("url", "")
+        session_properties = get_openai_session_properties(system_prompt, params, pipecat_tools)
+
+        logger.info(f"Using Azure Realtime LLM: {model_lower}, url {url}")
 
         if audit_log is not None:
             logger.info("Using InstrumentedRealtimeLLMService for audit log interception")
-            return InstrumentedRealtimeLLMService(
-                model=model_lower,
+            service = InstrumentedRealtimeLLMService(
                 audit_log=audit_log,
-                api_key=os.environ.get("AZURE_OPENAI_REALTIME_API_KEY"),
+                api_key=params["api_key"],
                 base_url=url,
                 session_properties=session_properties,
+                settings=OpenAIRealtimeLLMService.Settings(
+                    model=params["model"],
+                    session_properties=session_properties,
+                ),
             )
+            InstrumentedRealtimeLLMService._connect = override__connect  # azure realtime connect
+            return service
 
-        return AzureRealtimeLLMService(
-            api_key=os.environ.get("AZURE_OPENAI_REALTIME_API_KEY"),
+        return OpenAIRealtimeLLMService(
+            api_key=params["api_key"],
+            model=params["model"],
             base_url=url,
             session_properties=session_properties,
         )
     elif model_lower == "ultravox":
+        logger.info("Using Ultravox LLM")
         return UltravoxRealtimeLLMService(
             params=OneShotInputParams(
-                api_key=os.getenv("ULTRAVOX_API_KEY"),
+                api_key=params["api_key"],
                 system_prompt=system_prompt,
                 temperature=0.3,
                 max_duration=datetime.timedelta(minutes=6),
                 voice=params.get("voice", "03e20d03-35e4-43c4-bb18-9b18a2cd3086"),
+                model=params["model"],
             ),
             one_shot_selected_tools=pipecat_tools,
         )
@@ -468,6 +477,27 @@ def create_realtime_llm_service(
         raise ValueError(f"Unknown realtime model: {model}. Available: gpt-realtime, ultravox")
 
 
+def get_openai_session_properties(system_prompt: str, params: dict, pipecat_tools) -> SessionProperties:
+    """Create openai compatible session properties object."""
+    return SessionProperties(
+        instructions=system_prompt,
+        audio=AudioConfiguration(
+            input=AudioInput(
+                transcription=InputAudioTranscription(
+                    model=params.get("transcription_model", "gpt-4o-mini-transcribe")
+                ),
+                # Set openai TurnDetection parameters. Not setting this at all will turn it on by default
+                turn_detection=SemanticTurnDetection(),
+            ),
+            output=AudioOutput(
+                voice=params.get("voice", "marin"),
+            ),
+        ),
+        tools=pipecat_tools,
+        tool_choice="auto",
+    )
+
+
 def create_audio_llm_client(
     model: str,
     params: dict[str, Any],
@@ -573,6 +603,27 @@ async def override_run_tts(self, text: str, context_id: str) -> AsyncGenerator[F
         yield ErrorFrame(error=f"Unknown error occurred: {e}")
 
 
+async def override__connect(self):
+    # Allow connections to azure / other providers using a base_url
+    try:
+        if self._websocket:
+            # Here we assume that if we have a websocket, we are connected. We
+            # handle disconnections in the send/recv code paths.
+            return
+
+        logger.info(f"Connecting to {self.base_url}")
+        self._websocket = await websocket_connect(
+            uri=self.base_url,
+            additional_headers={
+                "api-key": self.api_key,
+            },
+        )
+        self._receive_task = self.create_task(self._receive_task_handler())
+    except Exception as e:
+        await self.push_error(error_msg=f"initialization error: {e}", exception=e)
+        self._websocket = None
+
+
 # Unicode to ASCII replacements for TTS
 _TTS_CHAR_MAP = str.maketrans(
     {
diff --git a/src/eva/assistant/server.py b/src/eva/assistant/server.py
index 57a0fc2e..4282e894 100644
--- a/src/eva/assistant/server.py
+++ b/src/eva/assistant/server.py
@@ -326,7 +326,10 @@ async def _realtime_tool_handler(params) -> None:
                     "smart_turn_stop_secs", 0.8
                 )  # Shorter silence so we don't have to wait 3s if smart turn marks audio as incomplete
 
-            if isinstance(self.pipeline_config, PipelineConfig) and self.pipeline_config.turn_strategy == "external":
+            if (
+                isinstance(self.pipeline_config, (PipelineConfig, SpeechToSpeechConfig))
+                and self.pipeline_config.turn_strategy == "external"
+            ):
                 logger.info("Using external user turn strategies")
                 user_turn_strategies = ExternalUserTurnStrategies()
                 vad_analyzer = None
@@ -444,9 +447,29 @@ async def on_user_transcription(text: str, timestamp: str, turn_id: int | None)
             self._latency_measurements = []
 
             async def on_latency_measured(observer, latency_seconds: float):
-                """Event handler for UserBotLatencyObserver - stores latency measurements."""
-                self._latency_measurements.append(latency_seconds)
-                logger.debug(f"Response latency captured: {latency_seconds:.3f}s")
+                """Event handler for UserBotLatencyObserver - stores latency measurements.
+
+                For realtime LLM, adds VAD delay to get full user-perceived latency.
+                For pipecat VAD (non-realtime), uses the latency as-is.
+                """
+                adjusted_latency = latency_seconds
+
+                # Add VAD delay for realtime LLM to get full user-perceived latency
+                if isinstance(realtime_llm, InstrumentedRealtimeLLMService):
+                    vad_delay_ms = realtime_llm.last_vad_delay_ms
+                    if vad_delay_ms is not None:
+                        vad_delay_s = vad_delay_ms / 1000.0
+                        adjusted_latency = latency_seconds + vad_delay_s
+                        logger.debug(
+                            f"Response latency captured: {adjusted_latency:.3f}s "
+                            f"(VAD delay: {vad_delay_s:.3f}s + pipecat: {latency_seconds:.3f}s)"
+                        )
+                    else:
+                        logger.debug(f"Response latency captured: {latency_seconds:.3f}s (no VAD delay available)")
+                else:
+                    logger.debug(f"Response latency captured: {latency_seconds:.3f}s")
+
+                self._latency_measurements.append(adjusted_latency)
 
             user_bot_observer = UserBotLatencyObserver()
             user_bot_observer.add_event_handler("on_latency_measured", on_latency_measured)
diff --git a/src/eva/models/config.py b/src/eva/models/config.py
index 474d29a8..e08783bd 100644
--- a/src/eva/models/config.py
+++ b/src/eva/models/config.py
@@ -12,6 +12,8 @@
 ``RunConfig(_env_file=".env", _cli_parse_args=True)``.
 """
 
+import copy
+import logging
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Annotated, Any, ClassVar, Literal
@@ -34,9 +36,12 @@
 
 from eva.models.provenance import RunProvenance
 
+logger = logging.getLogger(__name__)
 
-def current_date_and_time():
-    return f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}"
+
+def _param_alias(params: dict[str, Any]) -> str:
+    """Return the display alias from a params dict."""
+    return params.get("alias") or params["model"]
 
 
 class PipelineConfig(BaseModel):
@@ -73,6 +78,15 @@ class PipelineConfig(BaseModel):
         ),
     )
 
+    @property
+    def pipeline_parts(self) -> dict[str, str]:
+        """Component names for this pipeline."""
+        return {
+            "stt": _param_alias(self.stt_params),
+            "llm": self.llm,
+            "tts": _param_alias(self.tts_params),
+        }
+
     @model_validator(mode="before")
     @classmethod
     def _migrate_legacy_fields(cls, data: Any) -> Any:
@@ -97,6 +111,22 @@ class SpeechToSpeechConfig(BaseModel):
     s2s: str = Field(description="Speech-to-speech model name", examples=["gpt-realtime-mini", "gemini_live"])
     s2s_params: dict[str, Any] = Field({}, description="Additional speech-to-speech model parameters (JSON)")
 
+    turn_strategy: Literal["smart", "external"] = Field(
+        "smart",
+        description=(
+            "User turn detection strategy. "
+            "'smart' uses LocalSmartTurnAnalyzerV3 + SileroVAD (default). "
+            "'external' uses ExternalUserTurnStrategies for services with built-in turn detection "
+            "(e.g., deepgram-flux, Speechmatics). "
+            "Set via EVA_MODEL__TURN_STRATEGY=external."
+        ),
+    )
+
+    @property
+    def pipeline_parts(self) -> dict[str, str]:
+        """Component names for this pipeline."""
+        return {"s2s": _param_alias(self.s2s_params) or self.s2s}
+
 
 class AudioLLMConfig(BaseModel):
     """Configuration for an Audio-LLM pipeline (audio in, text out, separate TTS).
@@ -118,6 +148,14 @@ class AudioLLMConfig(BaseModel):
     tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"])
     tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)")
 
+    @property
+    def pipeline_parts(self) -> dict[str, str]:
+        """Component names for this pipeline."""
+        return {
+            "audio_llm": _param_alias(self.audio_llm_params) or self.audio_llm,
+            "tts": _param_alias(self.tts_params) or self.tts,
+        }
+
 
 _PIPELINE_FIELDS = {
     "llm",
@@ -129,7 +167,7 @@ class AudioLLMConfig(BaseModel):
     *PipelineConfig._LEGACY_RENAMES,
     *PipelineConfig._LEGACY_DROP,
 }
-_S2S_FIELDS = {"s2s", "s2s_params"}
+_S2S_FIELDS = {"s2s", "s2s_params", "turn_strategy"}
 _AUDIO_LLM_FIELDS = {"audio_llm", "audio_llm_params", "tts", "tts_params"}
 
 
@@ -269,6 +307,18 @@ class RunConfig(BaseSettings):
         "EVA_METRICS_TO_RUN": "EVA_METRICS",
     }
 
+    # Maps *_params field names to their provider field for env override logic
+    _PARAMS_TO_PROVIDER: ClassVar[dict[str, str]] = {
+        "stt_params": "stt",
+        "tts_params": "tts",
+        "s2s_params": "s2s",
+        "audio_llm_params": "audio_llm",
+    }
+    # Keys always read from the live environment (not persisted across runs)
+    _ENV_OVERRIDE_KEYS: ClassVar[set[str]] = {"url", "urls"}
+    # Substrings that identify secret keys (redacted in logs and config.json)
+    _SECRET_KEY_PATTERNS: ClassVar[set[str]] = {"key", "credentials", "secret"}
+
     class ModelDeployment(DeploymentTypedDict):
         """DeploymentTypedDict that preserves extra keys in litellm_params."""
 
@@ -283,7 +333,7 @@ class ModelDeployment(DeploymentTypedDict):
 
     # Run identifier
     run_id: str = Field(
-        default_factory=current_date_and_time,
+        "timestamp and model name(s)",  # Overwritten by _set_default_run_id()
         description="Run identifier, auto-generated if not provided",
     )
 
@@ -441,22 +491,31 @@ def _warn_deprecated_aliases(cls, data: Any) -> Any:
     @model_validator(mode="after")
     def _check_companion_services(self) -> "RunConfig":
         """Ensure required companion services are set for each pipeline mode."""
+        required_keys = ["api_key", "model"]
         if isinstance(self.model, PipelineConfig):
-            self._validate_service_params("STT", self.model.stt, self.model.stt_params)
-            self._validate_service_params("TTS", self.model.tts, self.model.tts_params)
+            self._validate_service_params("STT", self.model.stt, required_keys, self.model.stt_params)
+            self._validate_service_params("TTS", self.model.tts, required_keys, self.model.tts_params)
         elif isinstance(self.model, AudioLLMConfig):
-            self._validate_service_params("TTS", self.model.tts, self.model.tts_params)
+            self._validate_service_params("TTS", self.model.tts, required_keys, self.model.tts_params)
+            self._validate_service_params("audio_llm", self.model.audio_llm, required_keys, self.model.audio_llm_params)
+        elif isinstance(self.model, SpeechToSpeechConfig):
+            # api_key is required, some s2s services don't require model
+            self._validate_service_params("S2S", self.model.s2s, required_keys, self.model.s2s_params)
         return self
 
-    # Providers that manage their own model/key resolution (e.g. WebSocket-based)
-    _SKIP_PARAMS_VALIDATION: ClassVar[set[str]] = {"nvidia"}
+    @model_validator(mode="after")
+    def _set_default_run_id(self) -> "RunConfig":
+        if "run_id" not in self.model_fields_set:
+            suffix = "_".join(v for v in self.model.pipeline_parts.values() if v)
+            self.run_id = f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}_{suffix}"
+        return self
 
     @classmethod
-    def _validate_service_params(cls, service: str, provider: str, params: dict[str, Any]) -> None:
+    def _validate_service_params(
+        cls, service: str, provider: str, required_keys: list[str], params: dict[str, Any]
+    ) -> None:
         """Validate that STT/TTS params contain required keys."""
-        if provider.lower() in cls._SKIP_PARAMS_VALIDATION:
-            return
-        missing = [key for key in ("api_key", "model") if key not in params]
+        missing = [key for key in required_keys if key not in params]
         if missing:
             missing_str = " and ".join(f'"{k}"' for k in missing)
             env_var = f"EVA_MODEL__{service}_PARAMS"
@@ -485,20 +544,131 @@ def _expand_metrics_all(cls, v: list[str] | None) -> list[str] | None:
             return [m for m in get_global_registry().list_metrics() if m not in cls._VALIDATION_METRIC_NAMES]
         return v
 
+    @classmethod
+    def _is_secret_key(cls, key: str) -> bool:
+        """Return True if *key* matches any pattern in _SECRET_KEY_PATTERNS."""
+        return any(pattern in key for pattern in cls._SECRET_KEY_PATTERNS)
+
+    @classmethod
+    def _redact_dict(cls, params: dict) -> dict:
+        """Return a copy of *params* with secret values replaced by ``***``."""
+        return {k: "***" if cls._is_secret_key(k) else v for k, v in params.items()}
+
     @field_serializer("model_list")
     @classmethod
     def _redact_model_list(cls, deployments: list[ModelDeployment]) -> list[dict]:
         """Redact secret values in litellm_params when serializing."""
         redacted = []
         for deployment in deployments:
+            deployment = copy.deepcopy(deployment)
             if "litellm_params" in deployment:
-                params = deployment["litellm_params"]
-                for key in params:
-                    if "key" in key or "credentials" in key:
-                        params[key] = "***"
+                deployment["litellm_params"] = cls._redact_dict(deployment["litellm_params"])
             redacted.append(deployment)
         return redacted
 
+    @field_serializer("model")
+    @classmethod
+    def _redact_model_params(cls, model: ModelConfigUnion) -> dict:
+        """Redact secret values in STT/TTS/S2S/AudioLLM params when serializing."""
+        data = model.model_dump(mode="json")
+        for field_name, value in data.items():
+            if field_name.endswith("_params") and isinstance(value, dict):
+                data[field_name] = cls._redact_dict(value)
+        return data
+
+    def apply_env_overrides(self, live: "RunConfig") -> None:
+        """Apply environment-dependent values from *live* config onto this (saved) config.
+
+        Restores redacted secrets (``***``) and overrides dynamic fields (``url``,
+        ``urls``) in ``model.*_params`` and ``model_list[].litellm_params``.
+
+        Raises:
+            ValueError: If provider or alias differs for a service with redacted secrets.
+        """
+        # ── model.*_params (STT / TTS / S2S / AudioLLM) ──
+        for params_field, provider_field in self._PARAMS_TO_PROVIDER.items():
+            saved = getattr(self.model, params_field, None)
+            source = getattr(live.model, params_field, None)
+            if not isinstance(saved, dict) or not isinstance(source, dict):
+                continue
+
+            has_redacted = any(v == "***" for v in saved.values())
+            has_env_overrides = any(k in saved or k in source for k in self._ENV_OVERRIDE_KEYS)
+            if not has_redacted and not has_env_overrides:
+                continue
+
+            if has_redacted:
+                saved_alias = saved.get("alias")
+                live_alias = source.get("alias")
+                if saved_alias and live_alias and saved_alias != live_alias:
+                    raise ValueError(
+                        f"Cannot restore secrets: saved {params_field}[alias]={saved_alias!r} "
+                        f"but current environment has {params_field}[alias]={live_alias!r}"
+                    )
+
+                saved_provider = getattr(self.model, provider_field, None)
+                live_provider = getattr(live.model, provider_field, None)
+                if saved_provider != live_provider:
+                    logger.warning(
+                        f"Provider mismatch for {params_field}: saved {saved_provider!r}, "
+                        f"current environment has {live_provider!r}"
+                    )
+
+                saved_model = saved.get("model")
+                live_model = source.get("model")
+                if saved_model and live_model and saved_model != live_model:
+                    logger.warning(
+                        f"Model mismatch for {params_field}: saved {saved_model!r}, "
+                        f"current environment has {live_model!r}"
+                    )
+
+                for key, value in saved.items():
+                    if value == "***" and key in source:
+                        saved[key] = source[key]
+
+            # Always use url/urls from the live environment
+            for key in self._ENV_OVERRIDE_KEYS:
+                if key in source:
+                    saved_val = saved.get(key)
+                    if saved_val and saved_val != source[key]:
+                        logger.warning(
+                            f"{params_field}[{key}] differs: saved {saved_val!r}, "
+                            f"using {source[key]!r} from current environment"
+                        )
+                    saved[key] = source[key]
+
+        # ── model_list[].litellm_params (LLM deployments) ──
+        live_by_name = {d["model_name"]: d for d in live.model_list if "model_name" in d}
+        for deployment in self.model_list:
+            name = deployment.get("model_name")
+            if not name:
+                continue
+            saved_params = deployment.get("litellm_params", {})
+            has_redacted = any(v == "***" for v in saved_params.values())
+            if not has_redacted:
+                continue
+            if name not in live_by_name:
+                raise ValueError(
+                    f"Cannot restore secrets: deployment {name!r} not found in "
+                    f"current EVA_MODEL_LIST (available: {list(live_by_name)})"
+                )
+            live_params = live_by_name[name].get("litellm_params", {})
+            for key, value in saved_params.items():
+                if value == "***" and key in live_params:
+                    saved_params[key] = live_params[key]
+
+        # ── Log resolved configuration ──
+        for params_field, provider_field in self._PARAMS_TO_PROVIDER.items():
+            params = getattr(self.model, params_field, None)
+            provider = getattr(self.model, provider_field, None)
+            if isinstance(params, dict) and params:
+                logger.info(f"Resolved {provider_field} ({provider}): {self._redact_dict(params)}")
+
+        for deployment in self.model_list:
+            name = deployment.get("model_name", "?")
+            params = deployment.get("litellm_params", {})
+            logger.info(f"Resolved deployment {name}: {self._redact_dict(params)}")
+
     @classmethod
     def from_yaml(cls, path: Path | str) -> "RunConfig":
         """Load configuration from YAML file."""
diff --git a/src/eva/orchestrator/runner.py b/src/eva/orchestrator/runner.py
index f92d98af..ac5a45f3 100644
--- a/src/eva/orchestrator/runner.py
+++ b/src/eva/orchestrator/runner.py
@@ -138,7 +138,10 @@ async def run(self, records: list[EvaluationRecord]) -> RunResult:
             }
 
         config_path = self.output_dir / "config.json"
-        config_path.write_text(self.config.model_dump_json(indent=2))
+        config_data = self.config.model_dump(mode="json")
+        pipeline_parts = self.config.model.pipeline_parts
+        config_data["pipeline_parts"] = pipeline_parts
+        config_path.write_text(json.dumps(config_data, indent=2))
 
         # Build output_id list for tracking (supports pass@k)
         num_trials = self.config.num_trials
diff --git a/src/eva/run_benchmark.py b/src/eva/run_benchmark.py
index 92d32b01..49096448 100644
--- a/src/eva/run_benchmark.py
+++ b/src/eva/run_benchmark.py
@@ -42,6 +42,9 @@ async def run_benchmark(config: RunConfig) -> int:
             logger.error(str(e))
             return 1
 
+        # Apply env-dependent values (secrets, urls) from live env onto saved config
+        runner.config.apply_env_overrides(config)
+
         # Apply CLI overrides
         runner.config.max_rerun_attempts = config.max_rerun_attempts
         runner.config.force_rerun_metrics = config.force_rerun_metrics
diff --git a/src/eva/utils/prompt_manager.py b/src/eva/utils/prompt_manager.py
index 2216fddc..56971149 100644
--- a/src/eva/utils/prompt_manager.py
+++ b/src/eva/utils/prompt_manager.py
@@ -121,7 +121,7 @@ def get_prompt(self, path: str, **variables) -> str:
             return value.format(**formatted_vars)
         except KeyError as e:
             raise KeyError(
-                "Missing variable {e} for prompt '{path}'. Available variables: {sorted(formatted_vars.keys())}"
+                f"Missing variable {e} for prompt '{path}'. Available variables: {sorted(formatted_vars.keys())}"
             ) from e
 
 
diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py
index 8248c39f..50f22c73 100644
--- a/tests/unit/models/test_config_models.py
+++ b/tests/unit/models/test_config_models.py
@@ -2,7 +2,6 @@
 
 import json
 import os
-from datetime import datetime
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
@@ -56,6 +55,10 @@
     "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "test_key", "model": "nova-2"}),
     "EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "test_key", "model": "sonic"}),
 }
+_S2S_ENV = _EVA_MODEL_LIST_ENV | {
+    "EVA_MODEL__S2S": "gpt-realtime-mini",
+    "EVA_MODEL__S2S_PARAMS": json.dumps({"api_key": "", "model": "test"}),
+}
 
 
 def _config(
@@ -74,6 +77,12 @@ def _config(
         return RunConfig(_env_file=env_file, _cli_parse_args=cli_args, **kwargs)
 
 
+def _load_json_into_runconfig(json_str: str) -> RunConfig:
+    """Load RunConfig from JSON with isolated environment (no real env vars)."""
+    with patch.dict(os.environ, {}, clear=True):
+        return RunConfig.model_validate_json(json_str)
+
+
 class TestRunConfig:
     def test_create_minimal_config(self):
         """Test creating a minimal RunConfig."""
@@ -81,7 +90,8 @@ def test_create_minimal_config(self):
 
         assert config.dataset_path == Path("data/airline_dataset.jsonl")
         assert config.tool_mocks_path == Path("data/airline_scenarios")
-        assert datetime.strptime(config.run_id, "%Y-%m-%d_%H-%M-%S.%f")
+        # run_id = timestamp + model suffix (e.g. "2024-01-15_14-30-45.123456_nova-2_gpt-5.2_sonic")
+        assert config.run_id.endswith("nova-2_gpt-5.2_sonic")
         assert config.max_concurrent_conversations == 1
         assert config.conversation_timeout_seconds == 360
 
@@ -160,13 +170,172 @@ def test_indentation_in_model_list(self, tmp_path: Path, vars_location: str, ind
         assert config.model_list == MODEL_LIST
 
     def test_secrets_redacted(self):
-        """Secrets are redacted in model_list."""
+        """Secrets are redacted in model_list and STT/TTS params."""
         config = _config(env_vars=_BASE_ENV)
         dumped = config.model_dump(mode="json")
         assert dumped["model_list"][0]["litellm_params"]["api_key"] == "***"
         assert dumped["model_list"][1]["litellm_params"]["vertex_credentials"] == "***"
         assert dumped["model_list"][2]["litellm_params"]["aws_access_key_id"] == "***"
         assert dumped["model_list"][2]["litellm_params"]["aws_secret_access_key"] == "***"
+        # STT/TTS params api_key must also be redacted
+        assert dumped["model"]["stt_params"]["api_key"] == "***"
+        assert dumped["model"]["tts_params"]["api_key"] == "***"
+        # Non-secret fields preserved
+        assert dumped["model"]["stt_params"]["model"] == "nova-2"
+        assert dumped["model"]["tts_params"]["model"] == "sonic"
+
+    def test_secrets_redaction_does_not_mutate_live_config(self):
+        """Serializing must not corrupt the in-memory config objects."""
+        config = _config(env_vars=_BASE_ENV)
+        config.model_dump(mode="json")
+        # model_list keys must still hold real values
+        assert config.model_list[0]["litellm_params"]["api_key"] == "must_be_redacted"
+        assert config.model_list[1]["litellm_params"]["vertex_credentials"] == "must_be_redacted"
+        # STT/TTS params must still hold real values
+        assert config.model.stt_params["api_key"] == "test_key"
+        assert config.model.tts_params["api_key"] == "test_key"
+
+    def test_apply_env_overrides(self):
+        """Redacted secrets are restored from a live config for both model and model_list."""
+        config = _config(env_vars=_BASE_ENV)
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        # Everything is redacted after round-trip
+        assert loaded.model.stt_params["api_key"] == "***"
+        assert loaded.model.tts_params["api_key"] == "***"
+        assert loaded.model_list[0]["litellm_params"]["api_key"] == "***"
+        assert loaded.model_list[1]["litellm_params"]["vertex_credentials"] == "***"
+        assert loaded.model_list[2]["litellm_params"]["aws_access_key_id"] == "***"
+
+        loaded.apply_env_overrides(config)
+
+        # STT/TTS params restored
+        assert loaded.model.stt_params["api_key"] == "test_key"
+        assert loaded.model.tts_params["api_key"] == "test_key"
+        assert loaded.model.stt_params["model"] == "nova-2"
+        # model_list restored
+        assert loaded.model_list[0]["litellm_params"]["api_key"] == "must_be_redacted"
+        assert loaded.model_list[1]["litellm_params"]["vertex_credentials"] == "must_be_redacted"
+        assert loaded.model_list[2]["litellm_params"]["aws_access_key_id"] == "must_be_redacted"
+        assert loaded.model_list[2]["litellm_params"]["aws_secret_access_key"] == "must_be_redacted"
+
+    def test_apply_env_overrides_provider_mismatch(self, caplog):
+        """Restoring secrets warns (but succeeds) if the STT/TTS provider changed."""
+        config = _config(env_vars=_BASE_ENV)
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        live = _config(
+            env_vars=_BASE_ENV
+            | {
+                "EVA_MODEL__STT": "openai_whisper",
+                "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "whisper-1"}),
+            }
+        )
+        with caplog.at_level("WARNING", logger="eva.models.config"):
+            loaded.apply_env_overrides(live)
+        assert "Provider mismatch for stt_params" in caplog.text
+        assert "deepgram" in caplog.text
+        assert "openai_whisper" in caplog.text
+
+    def test_apply_env_overrides_alias_mismatch(self):
+        """Restoring secrets fails if the alias changed."""
+        config = _config(
+            env_vars=_BASE_ENV
+            | {
+                "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "alias": "stt-v1"}),
+            }
+        )
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        live = _config(
+            env_vars=_BASE_ENV
+            | {
+                "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "alias": "stt-v2"}),
+            }
+        )
+        with pytest.raises(
+            ValueError,
+            match=r"saved stt_params\[alias\]='stt-v1'.*current environment has stt_params\[alias\]='stt-v2'",
+        ):
+            loaded.apply_env_overrides(live)
+
+    def test_apply_env_overrides_model_mismatch_warns(self, caplog):
+        """Restoring secrets warns (but succeeds) if the STT/TTS model changed."""
+        config = _config(env_vars=_BASE_ENV)
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        live = _config(env_vars=_BASE_ENV | {"EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic-2"})})
+        with caplog.at_level("WARNING", logger="eva.models.config"):
+            loaded.apply_env_overrides(live)
+        assert "sonic" in caplog.text
+        assert "sonic-2" in caplog.text
+        assert loaded.model.tts_params["api_key"] == "k"
+
+    def test_apply_env_overrides_url_from_env(self, caplog):
+        """Url is always taken from the live env, with a warning if it differs."""
+        saved_env = _BASE_ENV | {
+            "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "url": "wss://old-host/stt"}),
+        }
+        config = _config(env_vars=saved_env)
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        # Live env has a different url
+        live_env = _BASE_ENV | {
+            "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "url": "wss://new-host/stt"}),
+        }
+        live = _config(env_vars=live_env)
+
+        with caplog.at_level("WARNING", logger="eva.models.config"):
+            loaded.apply_env_overrides(live)
+
+        assert loaded.model.stt_params["url"] == "wss://new-host/stt"
+        assert "wss://old-host/stt" in caplog.text
+        assert "wss://new-host/stt" in caplog.text
+
+    def test_apply_env_overrides_url_added_from_env(self):
+        """Url from live env is added even if the saved config didn't have one."""
+        config = _config(env_vars=_BASE_ENV)
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        live_env = _BASE_ENV | {
+            "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "url": "wss://new-host/stt"}),
+        }
+        live = _config(env_vars=live_env)
+        loaded.apply_env_overrides(live)
+
+        assert loaded.model.stt_params["url"] == "wss://new-host/stt"
+
+    def test_apply_env_overrides_llm_deployment_mismatch(self):
+        """Restoring secrets fails if a saved LLM deployment is missing from the live model_list."""
+        config = _config(env_vars=_BASE_ENV)
+        dumped_json = config.model_dump_json()
+        loaded = _load_json_into_runconfig(dumped_json)
+
+        # Live config has a different model_list (only one deployment, different name)
+        different_model_list = [
+            {
+                "model_name": "gpt-4o",
+                "litellm_params": {"model": "openai/gpt-4o", "api_key": "real_key"},
+            }
+        ]
+        live = _config(
+            env_vars={
+                "EVA_MODEL_LIST": json.dumps(different_model_list),
+                "EVA_MODEL__LLM": "gpt-4o",
+                "EVA_MODEL__STT": "deepgram",
+                "EVA_MODEL__TTS": "cartesia",
+                "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2"}),
+                "EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic"}),
+            }
+        )
+        with pytest.raises(ValueError, match=r"deployment 'gpt-5.2' not found in current EVA_MODEL_LIST"):
+            loaded.apply_env_overrides(live)
 
     @pytest.mark.parametrize(
         "environ, expected_exception, expected_message",
@@ -287,20 +456,6 @@ def test_missing_stt_tts_params(self):
                 }
             )
 
-    def test_nvidia_stt_skips_params_validation(self):
-        """NVIDIA STT skips api_key/model validation (uses url-based config)."""
-        config = _config(
-            env_vars=_EVA_MODEL_LIST_ENV
-            | {
-                "EVA_MODEL__LLM": "gpt-5.2",
-                "EVA_MODEL__STT": "nvidia",
-                "EVA_MODEL__TTS": "cartesia",
-                "EVA_MODEL__STT_PARAMS": json.dumps({"url": "ws://localhost:8000"}),
-                "EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic"}),
-            }
-        )
-        assert config.model.stt == "nvidia"
-
 
 class TestDefaults:
     """Verify default values match expectations."""
@@ -356,14 +511,14 @@ class TestDeprecatedEnvVars:
                 lambda c: c.model.tts,
             ),
             (
-                _EVA_MODEL_LIST_ENV,
+                _S2S_ENV,
                 "REALTIME_MODEL",
                 "EVA_MODEL__S2S",
                 "test-model",
                 lambda c: c.model.s2s,
             ),
             (
-                _EVA_MODEL_LIST_ENV,
+                _S2S_ENV,
                 "EVA_MODEL__REALTIME_MODEL",
                 "EVA_MODEL__S2S",
                 "test-model",
@@ -384,17 +539,17 @@ class TestDeprecatedEnvVars:
                 lambda c: c.model.tts_params,
             ),
             (
-                _EVA_MODEL_LIST_ENV | {"EVA_MODEL__S2S": "test-model"},
+                _S2S_ENV,
                 "REALTIME_MODEL_PARAMS",
                 "EVA_MODEL__S2S_PARAMS",
-                {"foo": "bar"},
+                {"api_key": "k", "model": "model"},
                 lambda c: c.model.s2s_params,
             ),
             (
-                _EVA_MODEL_LIST_ENV | {"EVA_MODEL__S2S": "test-model"},
+                _S2S_ENV,
                 "EVA_MODEL__REALTIME_MODEL_PARAMS",
                 "EVA_MODEL__S2S_PARAMS",
-                {"foo": "bar"},
+                {"api_key": "k", "model": "model"},
                 lambda c: c.model.s2s_params,
             ),
             (
@@ -581,7 +736,7 @@ def test_tts_model(self):
         assert c.model.tts == "cartesia"
 
     def test_realtime_model(self):
-        config = _config(env_vars=_EVA_MODEL_LIST_ENV, cli_args=["--realtime-model", "test-model"])
+        config = _config(env_vars=_S2S_ENV, cli_args=["--realtime-model", "test-model"])
         assert config.model.s2s == "test-model"
 
     def test_run_id(self):
@@ -652,20 +807,39 @@ class TestSpeechToSpeechConfig:
 
     def test_s2s_config_from_env(self):
         """EVA_MODEL__S2S selects SpeechToSpeechConfig."""
-        config = _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__S2S": "gpt-realtime-mini"})
+        config = _config(
+            env_vars=_EVA_MODEL_LIST_ENV
+            | {
+                "EVA_MODEL__S2S": "gpt-realtime-mini",
+                "EVA_MODEL__S2S_PARAMS": json.dumps({"api_key": "", "model": "gpt-realtime-mini"}),
+            }
+        )
         assert isinstance(config.model, SpeechToSpeechConfig)
         assert config.model.s2s == "gpt-realtime-mini"
 
     def test_s2s_config_from_cli(self):
         """--s2s-model selects SpeechToSpeechConfig."""
-        config = _config(env_vars=_EVA_MODEL_LIST_ENV, cli_args=["--model.s2s", "gemini_live"])
+        config = _config(
+            env_vars=_EVA_MODEL_LIST_ENV,
+            cli_args=[
+                "--model.s2s",
+                "gemini_live",
+                "--model.s2s-params",
+                '{"api_key": "test-key", "model": "gemini_live"}',
+            ],
+        )
         assert isinstance(config.model, SpeechToSpeechConfig)
         assert config.model.s2s == "gemini_live"
+        assert config.model.s2s_params == {"api_key": "test-key", "model": "gemini_live"}
 
     def test_s2s_config_with_params(self):
         """S2S params are passed through."""
         config = _config(
-            env_vars=_EVA_MODEL_LIST_ENV, model={"s2s": "gpt-realtime-mini", "s2s_params": {"voice": "alloy"}}
+            env_vars=_EVA_MODEL_LIST_ENV,
+            model={
+                "s2s": "gpt-realtime-mini",
+                "s2s_params": {"voice": "alloy", "api_key": "key_1", "model": "gpt-realtime-mini"},
+            },
         )
         assert isinstance(config.model, SpeechToSpeechConfig)
-        assert config.model.s2s_params == {"voice": "alloy"}
+        assert config.model.s2s_params == {"voice": "alloy", "api_key": "key_1", "model": "gpt-realtime-mini"}