From 59ef7cba79e23159c336ed0180f653ecdfaa763b Mon Sep 17 00:00:00 2001 From: Anjan <743179+t-anjan@users.noreply.github.com> Date: Thu, 8 Jan 2026 17:08:43 +0530 Subject: [PATCH] fix: convert scroll_at direction/magnitude to scroll_x/scroll_y in GoogleCUAClient Previously, GoogleCUAClient._process_provider_response was passing the raw direction and magnitude fields from Google CUA's scroll_at function call directly into the ScrollAction payload. This caused Pydantic validation errors because ScrollAction expects scroll_x and scroll_y fields instead. This change: - Safely extracts direction and magnitude with defaults (down, 800) - Converts direction/magnitude to scroll_x/scroll_y values - Handles all four directions (up, down, left, right) with proper sign - Gracefully handles missing or non-numeric magnitude values - Removes the unsupported direction error path The fix now correctly produces ScrollAction payloads that validate and execute properly when scrolling in the browser. --- stagehand/agent/google_cua.py | 45 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/stagehand/agent/google_cua.py b/stagehand/agent/google_cua.py index fc46196a..237ef23f 100644 --- a/stagehand/agent/google_cua.py +++ b/stagehand/agent/google_cua.py @@ -274,32 +274,39 @@ def _process_provider_response( elif action_name == "scroll_at": action_type_str = "scroll" x, y = self._normalize_coordinates(action_args["x"], action_args["y"]) - direction = action_args["direction"].lower() - magnitude = action_args.get("magnitude", 800) - # Denormalize magnitude based on direction - if direction in ("up", "down"): - magnitude = self._normalize_coordinates(0, magnitude)[1] - elif direction in ("left", "right"): - magnitude = self._normalize_coordinates(magnitude, 0)[0] + # Match the TypeScript GoogleCUAClient scroll_at behavior: + # - direction defaults to "down" if missing + # - magnitude defaults to 800 if missing / not a number + direction_raw = action_args.get("direction", "down") + direction = str(direction_raw or "down").lower() + raw_magnitude = action_args.get("magnitude", 800) + magnitude: int + if isinstance(raw_magnitude, (int, float)): + magnitude = int(raw_magnitude) else: - self.logger.error( - f"Unsupported scroll direction: {direction}", category="agent" - ) - return ( - [], - reasoning_text, - True, - f"Unsupported scroll direction: {direction}", - invoked_function_info, - ) + magnitude = 800 + + scroll_x = 0 + scroll_y = 0 + if direction == "up": + scroll_y = -magnitude + elif direction == "down": + scroll_y = magnitude + elif direction == "left": + scroll_x = -magnitude + elif direction == "right": + scroll_x = magnitude + else: + # Default to scrolling down if the direction is unknown + scroll_y = magnitude action_payload_dict = { "type": "scroll", "x": x, "y": y, - "direction": direction, - "magnitude": magnitude, + "scroll_x": scroll_x, + "scroll_y": scroll_y, } elif action_name == "drag_and_drop": action_type_str = "function"