From cbe9a19b32ff9f8631b02b34bf59cfa6b6b803fe Mon Sep 17 00:00:00 2001 From: Pete Hunt Date: Sun, 22 Mar 2026 14:38:43 -0400 Subject: [PATCH] feat: chain batching, mouse easing, screenshot dedup, network idle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - chain is now the primary interface for browser interactions: executes actions sequentially, waits for network idle, then auto-appends an observation block (snapshot -i + page state: URL, title, viewport, focus, dialog state) - Write commands (click, fill, goto, etc.) removed from standalone dispatch — only callable through chain. Server rejects standalone writes with guidance to use chain. - click/hover use cubic-eased mouse movement (3-40 steps, 30-300ms) from tracked position to element center, with locator fallback if bounding box unavailable - Screenshot SHA-256 dedup: returns "unchanged" if identical to previous capture. Hash clears on navigation. - Network idle auto-wait (1.5s cap) after click - Read commands deprioritized in docs ("rarely needed — chain auto-observes") - 13 new tests covering all four features Co-Authored-By: Claude Opus 4.6 (1M context) --- .agents/skills/gstack-browse/SKILL.md | 78 +++++------ .agents/skills/gstack/SKILL.md | 78 +++++------ SKILL.md | 78 +++++------ browse/SKILL.md | 78 +++++------ browse/src/browser-manager.ts | 66 ++++++++- browse/src/commands.ts | 102 ++++++++------ browse/src/meta-commands.ts | 64 +++++++-- browse/src/mouse.ts | 55 ++++++++ browse/src/server.ts | 19 ++- browse/src/write-commands.ts | 44 ++++-- browse/test/commands.test.ts | 193 ++++++++++++++++++++++++++ scripts/gen-skill-docs.ts | 4 +- 12 files changed, 630 insertions(+), 229 deletions(-) create mode 100644 browse/src/mouse.ts diff --git a/.agents/skills/gstack-browse/SKILL.md b/.agents/skills/gstack-browse/SKILL.md index 45a59485..abbb50a9 100644 --- a/.agents/skills/gstack-browse/SKILL.md +++ b/.agents/skills/gstack-browse/SKILL.md @@ -406,44 +406,49 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ## Full Command List +### Chain +| Command | Description | +|---------|-------------| +| `chain ` | Execute actions + auto-observe. Primary interface for all browser interactions. Returns action results + snapshot + page state. | + ### Navigation | Command | Description | |---------|-------------| -| `back` | History back | -| `forward` | History forward | -| `goto ` | Navigate to URL | -| `reload` | Reload page | +| `back` | History back (chain-only) | +| `forward` | History forward (chain-only) | +| `goto ` | Navigate to URL (chain-only) | +| `reload` | Reload page (chain-only) | | `url` | Print current URL | ### Reading | Command | Description | |---------|-------------| -| `accessibility` | Full ARIA tree | -| `forms` | Form fields as JSON | -| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given | -| `links` | All links as "text → href" | -| `text` | Cleaned page text | +| `accessibility` | Full ARIA tree (rarely needed) | +| `forms` | Form fields as JSON (rarely needed) | +| `html [selector]` | innerHTML of selector or full page HTML (rarely needed) | +| `links` | All links as "text → href" (rarely needed) | +| `text` | Cleaned page text (rarely needed — chain auto-observes) | ### Interaction | Command | Description | |---------|-------------| -| `click ` | Click element | -| `cookie =` | Set cookie on current page domain | -| `cookie-import ` | Import cookies from JSON file | -| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) | -| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response | -| `dialog-dismiss` | Auto-dismiss next dialog | -| `fill ` | Fill input | -| `header :` | Set custom request header (colon-separated, sensitive values auto-redacted) | -| `hover ` | Hover element | -| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter | -| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector | -| `select ` | Select dropdown option by value, label, or visible text | -| `type ` | Type into focused element | -| `upload [file2...]` | Upload file(s) | -| `useragent ` | Set user agent | -| `viewport ` | Set viewport size | -| `wait ` | Wait for element, network idle, or page load (timeout: 15s) | +| `click ` | Click element with eased mouse movement (chain-only) | +| `cookie =` | Set cookie on current domain (chain-only) | +| `cookie-import ` | Import cookies from JSON file (chain-only) | +| `cookie-import-browser [browser] [--domain d]` | Import cookies from browser (chain-only) | +| `dialog-accept [text]` | Auto-accept dialogs (chain-only) | +| `dialog-dismiss` | Auto-dismiss dialogs (chain-only) | +| `fill ` | Fill input (chain-only) | +| `header :` | Set custom request header (chain-only) | +| `hover ` | Hover element with eased mouse movement (chain-only) | +| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter (chain-only) | +| `scroll [sel]` | Scroll element into view or to page bottom (chain-only) | +| `select ` | Select dropdown option (chain-only) | +| `type ` | Type into focused element (chain-only) | +| `upload [file2...]` | Upload file(s) (chain-only) | +| `useragent ` | Set user agent (chain-only) | +| `viewport ` | Set viewport size (chain-only) | +| `wait ` | Wait for element, network idle, or page load (chain-only) | ### Inspection | Command | Description | @@ -453,30 +458,25 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | `cookies` | All cookies as JSON | | `css ` | Computed CSS value | | `dialog [--clear]` | Dialog messages | -| `eval ` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) | +| `eval ` | Run JavaScript from file (path must be under /tmp or cwd) | | `is ` | State check (visible/hidden/enabled/disabled/checked/editable/focused) | -| `js ` | Run JavaScript expression and return result as string | +| `js ` | Run JavaScript expression | | `network [--clear]` | Network requests | | `perf` | Page load timings | -| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set to write localStorage | +| `storage [set k v]` | Read localStorage + sessionStorage, or set | ### Visual | Command | Description | |---------|-------------| | `diff ` | Text diff between pages | | `pdf [path]` | Save as PDF | -| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. | -| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) | +| `responsive [prefix]` | Screenshots at mobile, tablet, desktop breakpoints | +| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (deduped — returns "unchanged" if identical to previous) | ### Snapshot | Command | Description | |---------|-------------| -| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs | - -### Meta -| Command | Description | -|---------|-------------| -| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] | +| `snapshot [flags]` | Accessibility tree with @e refs. Flags: -i interactive, -c compact, -d N depth, -s sel scope, -D diff, -a annotated, -o path, -C cursor-interactive | ### Tabs | Command | Description | @@ -489,8 +489,8 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ### Server | Command | Description | |---------|-------------| -| `handoff [message]` | Open visible Chrome at current page for user takeover | +| `handoff [message]` | Open visible Chrome for user takeover | | `restart` | Restart server | -| `resume` | Re-snapshot after user takeover, return control to AI | +| `resume` | Re-snapshot after user takeover | | `status` | Health check | | `stop` | Shutdown server | diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md index 93128866..5ee0870f 100644 --- a/.agents/skills/gstack/SKILL.md +++ b/.agents/skills/gstack/SKILL.md @@ -534,44 +534,49 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ## Command Reference +### Chain +| Command | Description | +|---------|-------------| +| `chain ` | Execute actions + auto-observe. Primary interface for all browser interactions. Returns action results + snapshot + page state. | + ### Navigation | Command | Description | |---------|-------------| -| `back` | History back | -| `forward` | History forward | -| `goto ` | Navigate to URL | -| `reload` | Reload page | +| `back` | History back (chain-only) | +| `forward` | History forward (chain-only) | +| `goto ` | Navigate to URL (chain-only) | +| `reload` | Reload page (chain-only) | | `url` | Print current URL | ### Reading | Command | Description | |---------|-------------| -| `accessibility` | Full ARIA tree | -| `forms` | Form fields as JSON | -| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given | -| `links` | All links as "text → href" | -| `text` | Cleaned page text | +| `accessibility` | Full ARIA tree (rarely needed) | +| `forms` | Form fields as JSON (rarely needed) | +| `html [selector]` | innerHTML of selector or full page HTML (rarely needed) | +| `links` | All links as "text → href" (rarely needed) | +| `text` | Cleaned page text (rarely needed — chain auto-observes) | ### Interaction | Command | Description | |---------|-------------| -| `click ` | Click element | -| `cookie =` | Set cookie on current page domain | -| `cookie-import ` | Import cookies from JSON file | -| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) | -| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response | -| `dialog-dismiss` | Auto-dismiss next dialog | -| `fill ` | Fill input | -| `header :` | Set custom request header (colon-separated, sensitive values auto-redacted) | -| `hover ` | Hover element | -| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter | -| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector | -| `select ` | Select dropdown option by value, label, or visible text | -| `type ` | Type into focused element | -| `upload [file2...]` | Upload file(s) | -| `useragent ` | Set user agent | -| `viewport ` | Set viewport size | -| `wait ` | Wait for element, network idle, or page load (timeout: 15s) | +| `click ` | Click element with eased mouse movement (chain-only) | +| `cookie =` | Set cookie on current domain (chain-only) | +| `cookie-import ` | Import cookies from JSON file (chain-only) | +| `cookie-import-browser [browser] [--domain d]` | Import cookies from browser (chain-only) | +| `dialog-accept [text]` | Auto-accept dialogs (chain-only) | +| `dialog-dismiss` | Auto-dismiss dialogs (chain-only) | +| `fill ` | Fill input (chain-only) | +| `header :` | Set custom request header (chain-only) | +| `hover ` | Hover element with eased mouse movement (chain-only) | +| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter (chain-only) | +| `scroll [sel]` | Scroll element into view or to page bottom (chain-only) | +| `select ` | Select dropdown option (chain-only) | +| `type ` | Type into focused element (chain-only) | +| `upload [file2...]` | Upload file(s) (chain-only) | +| `useragent ` | Set user agent (chain-only) | +| `viewport ` | Set viewport size (chain-only) | +| `wait ` | Wait for element, network idle, or page load (chain-only) | ### Inspection | Command | Description | @@ -581,30 +586,25 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | `cookies` | All cookies as JSON | | `css ` | Computed CSS value | | `dialog [--clear]` | Dialog messages | -| `eval ` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) | +| `eval ` | Run JavaScript from file (path must be under /tmp or cwd) | | `is ` | State check (visible/hidden/enabled/disabled/checked/editable/focused) | -| `js ` | Run JavaScript expression and return result as string | +| `js ` | Run JavaScript expression | | `network [--clear]` | Network requests | | `perf` | Page load timings | -| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set to write localStorage | +| `storage [set k v]` | Read localStorage + sessionStorage, or set | ### Visual | Command | Description | |---------|-------------| | `diff ` | Text diff between pages | | `pdf [path]` | Save as PDF | -| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. | -| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) | +| `responsive [prefix]` | Screenshots at mobile, tablet, desktop breakpoints | +| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (deduped — returns "unchanged" if identical to previous) | ### Snapshot | Command | Description | |---------|-------------| -| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs | - -### Meta -| Command | Description | -|---------|-------------| -| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] | +| `snapshot [flags]` | Accessibility tree with @e refs. Flags: -i interactive, -c compact, -d N depth, -s sel scope, -D diff, -a annotated, -o path, -C cursor-interactive | ### Tabs | Command | Description | @@ -617,9 +617,9 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ### Server | Command | Description | |---------|-------------| -| `handoff [message]` | Open visible Chrome at current page for user takeover | +| `handoff [message]` | Open visible Chrome for user takeover | | `restart` | Restart server | -| `resume` | Re-snapshot after user takeover, return control to AI | +| `resume` | Re-snapshot after user takeover | | `status` | Health check | | `stop` | Shutdown server | diff --git a/SKILL.md b/SKILL.md index d8e51bd1..fc2eb34a 100644 --- a/SKILL.md +++ b/SKILL.md @@ -540,44 +540,49 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ## Command Reference +### Chain +| Command | Description | +|---------|-------------| +| `chain ` | Execute actions + auto-observe. Primary interface for all browser interactions. Returns action results + snapshot + page state. | + ### Navigation | Command | Description | |---------|-------------| -| `back` | History back | -| `forward` | History forward | -| `goto ` | Navigate to URL | -| `reload` | Reload page | +| `back` | History back (chain-only) | +| `forward` | History forward (chain-only) | +| `goto ` | Navigate to URL (chain-only) | +| `reload` | Reload page (chain-only) | | `url` | Print current URL | ### Reading | Command | Description | |---------|-------------| -| `accessibility` | Full ARIA tree | -| `forms` | Form fields as JSON | -| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given | -| `links` | All links as "text → href" | -| `text` | Cleaned page text | +| `accessibility` | Full ARIA tree (rarely needed) | +| `forms` | Form fields as JSON (rarely needed) | +| `html [selector]` | innerHTML of selector or full page HTML (rarely needed) | +| `links` | All links as "text → href" (rarely needed) | +| `text` | Cleaned page text (rarely needed — chain auto-observes) | ### Interaction | Command | Description | |---------|-------------| -| `click ` | Click element | -| `cookie =` | Set cookie on current page domain | -| `cookie-import ` | Import cookies from JSON file | -| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) | -| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response | -| `dialog-dismiss` | Auto-dismiss next dialog | -| `fill ` | Fill input | -| `header :` | Set custom request header (colon-separated, sensitive values auto-redacted) | -| `hover ` | Hover element | -| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter | -| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector | -| `select ` | Select dropdown option by value, label, or visible text | -| `type ` | Type into focused element | -| `upload [file2...]` | Upload file(s) | -| `useragent ` | Set user agent | -| `viewport ` | Set viewport size | -| `wait ` | Wait for element, network idle, or page load (timeout: 15s) | +| `click ` | Click element with eased mouse movement (chain-only) | +| `cookie =` | Set cookie on current domain (chain-only) | +| `cookie-import ` | Import cookies from JSON file (chain-only) | +| `cookie-import-browser [browser] [--domain d]` | Import cookies from browser (chain-only) | +| `dialog-accept [text]` | Auto-accept dialogs (chain-only) | +| `dialog-dismiss` | Auto-dismiss dialogs (chain-only) | +| `fill ` | Fill input (chain-only) | +| `header :` | Set custom request header (chain-only) | +| `hover ` | Hover element with eased mouse movement (chain-only) | +| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter (chain-only) | +| `scroll [sel]` | Scroll element into view or to page bottom (chain-only) | +| `select ` | Select dropdown option (chain-only) | +| `type ` | Type into focused element (chain-only) | +| `upload [file2...]` | Upload file(s) (chain-only) | +| `useragent ` | Set user agent (chain-only) | +| `viewport ` | Set viewport size (chain-only) | +| `wait ` | Wait for element, network idle, or page load (chain-only) | ### Inspection | Command | Description | @@ -587,30 +592,25 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | `cookies` | All cookies as JSON | | `css ` | Computed CSS value | | `dialog [--clear]` | Dialog messages | -| `eval ` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) | +| `eval ` | Run JavaScript from file (path must be under /tmp or cwd) | | `is ` | State check (visible/hidden/enabled/disabled/checked/editable/focused) | -| `js ` | Run JavaScript expression and return result as string | +| `js ` | Run JavaScript expression | | `network [--clear]` | Network requests | | `perf` | Page load timings | -| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set to write localStorage | +| `storage [set k v]` | Read localStorage + sessionStorage, or set | ### Visual | Command | Description | |---------|-------------| | `diff ` | Text diff between pages | | `pdf [path]` | Save as PDF | -| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. | -| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) | +| `responsive [prefix]` | Screenshots at mobile, tablet, desktop breakpoints | +| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (deduped — returns "unchanged" if identical to previous) | ### Snapshot | Command | Description | |---------|-------------| -| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs | - -### Meta -| Command | Description | -|---------|-------------| -| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] | +| `snapshot [flags]` | Accessibility tree with @e refs. Flags: -i interactive, -c compact, -d N depth, -s sel scope, -D diff, -a annotated, -o path, -C cursor-interactive | ### Tabs | Command | Description | @@ -623,9 +623,9 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ### Server | Command | Description | |---------|-------------| -| `handoff [message]` | Open visible Chrome at current page for user takeover | +| `handoff [message]` | Open visible Chrome for user takeover | | `restart` | Restart server | -| `resume` | Re-snapshot after user takeover, return control to AI | +| `resume` | Re-snapshot after user takeover | | `status` | Health check | | `stop` | Shutdown server | diff --git a/browse/SKILL.md b/browse/SKILL.md index e7ab6205..cff6bf02 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -412,44 +412,49 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ## Full Command List +### Chain +| Command | Description | +|---------|-------------| +| `chain ` | Execute actions + auto-observe. Primary interface for all browser interactions. Returns action results + snapshot + page state. | + ### Navigation | Command | Description | |---------|-------------| -| `back` | History back | -| `forward` | History forward | -| `goto ` | Navigate to URL | -| `reload` | Reload page | +| `back` | History back (chain-only) | +| `forward` | History forward (chain-only) | +| `goto ` | Navigate to URL (chain-only) | +| `reload` | Reload page (chain-only) | | `url` | Print current URL | ### Reading | Command | Description | |---------|-------------| -| `accessibility` | Full ARIA tree | -| `forms` | Form fields as JSON | -| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given | -| `links` | All links as "text → href" | -| `text` | Cleaned page text | +| `accessibility` | Full ARIA tree (rarely needed) | +| `forms` | Form fields as JSON (rarely needed) | +| `html [selector]` | innerHTML of selector or full page HTML (rarely needed) | +| `links` | All links as "text → href" (rarely needed) | +| `text` | Cleaned page text (rarely needed — chain auto-observes) | ### Interaction | Command | Description | |---------|-------------| -| `click ` | Click element | -| `cookie =` | Set cookie on current page domain | -| `cookie-import ` | Import cookies from JSON file | -| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) | -| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response | -| `dialog-dismiss` | Auto-dismiss next dialog | -| `fill ` | Fill input | -| `header :` | Set custom request header (colon-separated, sensitive values auto-redacted) | -| `hover ` | Hover element | -| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter | -| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector | -| `select ` | Select dropdown option by value, label, or visible text | -| `type ` | Type into focused element | -| `upload [file2...]` | Upload file(s) | -| `useragent ` | Set user agent | -| `viewport ` | Set viewport size | -| `wait ` | Wait for element, network idle, or page load (timeout: 15s) | +| `click ` | Click element with eased mouse movement (chain-only) | +| `cookie =` | Set cookie on current domain (chain-only) | +| `cookie-import ` | Import cookies from JSON file (chain-only) | +| `cookie-import-browser [browser] [--domain d]` | Import cookies from browser (chain-only) | +| `dialog-accept [text]` | Auto-accept dialogs (chain-only) | +| `dialog-dismiss` | Auto-dismiss dialogs (chain-only) | +| `fill ` | Fill input (chain-only) | +| `header :` | Set custom request header (chain-only) | +| `hover ` | Hover element with eased mouse movement (chain-only) | +| `press ` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter (chain-only) | +| `scroll [sel]` | Scroll element into view or to page bottom (chain-only) | +| `select ` | Select dropdown option (chain-only) | +| `type ` | Type into focused element (chain-only) | +| `upload [file2...]` | Upload file(s) (chain-only) | +| `useragent ` | Set user agent (chain-only) | +| `viewport ` | Set viewport size (chain-only) | +| `wait ` | Wait for element, network idle, or page load (chain-only) | ### Inspection | Command | Description | @@ -459,30 +464,25 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | `cookies` | All cookies as JSON | | `css ` | Computed CSS value | | `dialog [--clear]` | Dialog messages | -| `eval ` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) | +| `eval ` | Run JavaScript from file (path must be under /tmp or cwd) | | `is ` | State check (visible/hidden/enabled/disabled/checked/editable/focused) | -| `js ` | Run JavaScript expression and return result as string | +| `js ` | Run JavaScript expression | | `network [--clear]` | Network requests | | `perf` | Page load timings | -| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set to write localStorage | +| `storage [set k v]` | Read localStorage + sessionStorage, or set | ### Visual | Command | Description | |---------|-------------| | `diff ` | Text diff between pages | | `pdf [path]` | Save as PDF | -| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. | -| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) | +| `responsive [prefix]` | Screenshots at mobile, tablet, desktop breakpoints | +| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (deduped — returns "unchanged" if identical to previous) | ### Snapshot | Command | Description | |---------|-------------| -| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs | - -### Meta -| Command | Description | -|---------|-------------| -| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] | +| `snapshot [flags]` | Accessibility tree with @e refs. Flags: -i interactive, -c compact, -d N depth, -s sel scope, -D diff, -a annotated, -o path, -C cursor-interactive | ### Tabs | Command | Description | @@ -495,8 +495,8 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ### Server | Command | Description | |---------|-------------| -| `handoff [message]` | Open visible Chrome at current page for user takeover | +| `handoff [message]` | Open visible Chrome for user takeover | | `restart` | Restart server | -| `resume` | Re-snapshot after user takeover, return control to AI | +| `resume` | Re-snapshot after user takeover | | `status` | Health check | | `stop` | Shutdown server | diff --git a/browse/src/browser-manager.ts b/browse/src/browser-manager.ts index 31a1f9de..fdc1d610 100644 --- a/browse/src/browser-manager.ts +++ b/browse/src/browser-manager.ts @@ -53,6 +53,13 @@ export class BrowserManager { // NOT cleared on navigation — it's a text baseline for diffing private lastSnapshot: string | null = null; + // ─── Mouse Position Tracking ──────────────────────────────── + private mouseX: number = 0; + private mouseY: number = 0; + + // ─── Screenshot Dedup ─────────────────────────────────────── + private screenshotHash: string | null = null; + // ─── Dialog Handling ────────────────────────────────────── private dialogAutoAccept: boolean = true; private dialogPromptText: string | null = null; @@ -550,13 +557,68 @@ export class BrowserManager { return null; } + // ─── Mouse Position ────────────────────────────────────── + setMousePosition(x: number, y: number) { + this.mouseX = x; + this.mouseY = y; + } + + getMousePosition(): { x: number; y: number } { + return { x: this.mouseX, y: this.mouseY }; + } + + // ─── Screenshot Dedup ────────────────────────────────────── + setScreenshotHash(hash: string | null) { + this.screenshotHash = hash; + } + + getScreenshotHash(): string | null { + return this.screenshotHash; + } + + // ─── Page State (for chain observation) ───────────────────── + async getPageState(): Promise<{ + url: string; + title: string; + viewport: { width: number; height: number } | null; + focusedElement: string | null; + dialogState: string; + }> { + const page = this.getPage(); + const url = page.url(); + const title = await page.title().catch(() => ''); + const viewport = page.viewportSize(); + + // Get focused element description + let focusedElement: string | null = null; + try { + focusedElement = await page.evaluate(() => { + const el = document.activeElement; + if (!el || el === document.body) return null; + const tag = el.tagName.toLowerCase(); + const role = el.getAttribute('role') || ''; + const name = el.getAttribute('aria-label') || el.getAttribute('name') || el.getAttribute('placeholder') || ''; + const parts = [tag]; + if (role) parts.push(`role=${role}`); + if (name) parts.push(`"${name}"`); + return parts.join(' '); + }); + } catch {} + + // Dialog state + const dialogState = this.dialogAutoAccept ? 'auto-accept' : 'auto-dismiss'; + + return { url, title, viewport, focusedElement, dialogState }; + } + // ─── Console/Network/Dialog/Ref Wiring ──────────────────── private wirePageEvents(page: Page) { - // Clear ref map on navigation — refs point to stale elements after page change - // (lastSnapshot is NOT cleared — it's a text baseline for diffing) + // Clear ref map + screenshot hash on navigation — refs point to stale elements, + // screenshot may differ. (lastSnapshot is NOT cleared — it's a text baseline for diffing) page.on('framenavigated', (frame) => { if (frame === page.mainFrame()) { this.clearRefs(); + this.screenshotHash = null; } }); diff --git a/browse/src/commands.ts b/browse/src/commands.ts index c3509af1..0d09ea37 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -8,8 +8,15 @@ * ──▶ skill-check.ts (health reporting) * * Zero side effects. Safe to import from build scripts and tests. + * + * Architecture: + * - CHAIN_ONLY_COMMANDS: write/interaction commands, only callable via chain + * - READ_COMMANDS: observation commands, callable standalone (but rarely needed) + * - META_COMMANDS: tabs, server control, visual, snapshot — callable standalone + * - chain is the primary interface: executes actions + auto-observes (snapshot + page state) */ +/** Read commands — callable standalone, but rarely needed since chain auto-observes */ export const READ_COMMANDS = new Set([ 'text', 'html', 'links', 'forms', 'accessibility', 'js', 'eval', 'css', 'attrs', @@ -17,13 +24,20 @@ export const READ_COMMANDS = new Set([ 'dialog', 'is', ]); -export const WRITE_COMMANDS = new Set([ +/** + * Write/interaction commands — ONLY callable inside chain. + * Standalone dispatch is rejected with guidance to use chain. + */ +export const CHAIN_ONLY_COMMANDS = new Set([ 'goto', 'back', 'forward', 'reload', 'click', 'fill', 'select', 'hover', 'type', 'press', 'scroll', 'wait', 'viewport', 'cookie', 'cookie-import', 'cookie-import-browser', 'header', 'useragent', 'upload', 'dialog-accept', 'dialog-dismiss', ]); +/** @deprecated Use CHAIN_ONLY_COMMANDS. Kept for backward compat in tests. */ +export const WRITE_COMMANDS = CHAIN_ONLY_COMMANDS; + export const META_COMMANDS = new Set([ 'tabs', 'tab', 'newtab', 'closetab', 'status', 'stop', 'restart', @@ -33,24 +47,26 @@ export const META_COMMANDS = new Set([ 'handoff', 'resume', ]); -export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]); +export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...CHAIN_ONLY_COMMANDS, ...META_COMMANDS]); export const COMMAND_DESCRIPTIONS: Record = { - // Navigation - 'goto': { category: 'Navigation', description: 'Navigate to URL', usage: 'goto ' }, - 'back': { category: 'Navigation', description: 'History back' }, - 'forward': { category: 'Navigation', description: 'History forward' }, - 'reload': { category: 'Navigation', description: 'Reload page' }, + // Chain — primary interface + 'chain': { category: 'Chain', description: 'Execute actions + auto-observe. Primary interface for all browser interactions. Returns action results + snapshot + page state.', usage: 'chain ' }, + // Navigation (chain-only) + 'goto': { category: 'Navigation', description: 'Navigate to URL (chain-only)', usage: 'goto ' }, + 'back': { category: 'Navigation', description: 'History back (chain-only)' }, + 'forward': { category: 'Navigation', description: 'History forward (chain-only)' }, + 'reload': { category: 'Navigation', description: 'Reload page (chain-only)' }, 'url': { category: 'Navigation', description: 'Print current URL' }, - // Reading - 'text': { category: 'Reading', description: 'Cleaned page text' }, - 'html': { category: 'Reading', description: 'innerHTML of selector (throws if not found), or full page HTML if no selector given', usage: 'html [selector]' }, - 'links': { category: 'Reading', description: 'All links as "text → href"' }, - 'forms': { category: 'Reading', description: 'Form fields as JSON' }, - 'accessibility': { category: 'Reading', description: 'Full ARIA tree' }, - // Inspection - 'js': { category: 'Inspection', description: 'Run JavaScript expression and return result as string', usage: 'js ' }, - 'eval': { category: 'Inspection', description: 'Run JavaScript from file and return result as string (path must be under /tmp or cwd)', usage: 'eval ' }, + // Reading (standalone — rarely needed, chain auto-observes) + 'text': { category: 'Reading', description: 'Cleaned page text (rarely needed — chain auto-observes)' }, + 'html': { category: 'Reading', description: 'innerHTML of selector or full page HTML (rarely needed)', usage: 'html [selector]' }, + 'links': { category: 'Reading', description: 'All links as "text → href" (rarely needed)' }, + 'forms': { category: 'Reading', description: 'Form fields as JSON (rarely needed)' }, + 'accessibility': { category: 'Reading', description: 'Full ARIA tree (rarely needed)' }, + // Inspection (standalone — rarely needed) + 'js': { category: 'Inspection', description: 'Run JavaScript expression', usage: 'js ' }, + 'eval': { category: 'Inspection', description: 'Run JavaScript from file (path must be under /tmp or cwd)', usage: 'eval ' }, 'css': { category: 'Inspection', description: 'Computed CSS value', usage: 'css ' }, 'attrs': { category: 'Inspection', description: 'Element attributes as JSON', usage: 'attrs ' }, 'is': { category: 'Inspection', description: 'State check (visible/hidden/enabled/disabled/checked/editable/focused)', usage: 'is ' }, @@ -58,31 +74,33 @@ export const COMMAND_DESCRIPTIONS: Record to write localStorage', usage: 'storage [set k v]' }, + 'storage': { category: 'Inspection', description: 'Read localStorage + sessionStorage, or set ', usage: 'storage [set k v]' }, 'perf': { category: 'Inspection', description: 'Page load timings' }, - // Interaction - 'click': { category: 'Interaction', description: 'Click element', usage: 'click ' }, - 'fill': { category: 'Interaction', description: 'Fill input', usage: 'fill ' }, - 'select': { category: 'Interaction', description: 'Select dropdown option by value, label, or visible text', usage: 'select ' }, - 'hover': { category: 'Interaction', description: 'Hover element', usage: 'hover ' }, - 'type': { category: 'Interaction', description: 'Type into focused element', usage: 'type ' }, - 'press': { category: 'Interaction', description: 'Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter', usage: 'press ' }, - 'scroll': { category: 'Interaction', description: 'Scroll element into view, or scroll to page bottom if no selector', usage: 'scroll [sel]' }, - 'wait': { category: 'Interaction', description: 'Wait for element, network idle, or page load (timeout: 15s)', usage: 'wait ' }, - 'upload': { category: 'Interaction', description: 'Upload file(s)', usage: 'upload [file2...]' }, - 'viewport':{ category: 'Interaction', description: 'Set viewport size', usage: 'viewport ' }, - 'cookie': { category: 'Interaction', description: 'Set cookie on current page domain', usage: 'cookie =' }, - 'cookie-import': { category: 'Interaction', description: 'Import cookies from JSON file', usage: 'cookie-import ' }, - 'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import)', usage: 'cookie-import-browser [browser] [--domain d]' }, - 'header': { category: 'Interaction', description: 'Set custom request header (colon-separated, sensitive values auto-redacted)', usage: 'header :' }, - 'useragent': { category: 'Interaction', description: 'Set user agent', usage: 'useragent ' }, - 'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response', usage: 'dialog-accept [text]' }, - 'dialog-dismiss': { category: 'Interaction', description: 'Auto-dismiss next dialog' }, + // Interaction (chain-only) + 'click': { category: 'Interaction', description: 'Click element with eased mouse movement (chain-only)', usage: 'click ' }, + 'fill': { category: 'Interaction', description: 'Fill input (chain-only)', usage: 'fill ' }, + 'select': { category: 'Interaction', description: 'Select dropdown option (chain-only)', usage: 'select ' }, + 'hover': { category: 'Interaction', description: 'Hover element with eased mouse movement (chain-only)', usage: 'hover ' }, + 'type': { category: 'Interaction', description: 'Type into focused element (chain-only)', usage: 'type ' }, + 'press': { category: 'Interaction', description: 'Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter (chain-only)', usage: 'press ' }, + 'scroll': { category: 'Interaction', description: 'Scroll element into view or to page bottom (chain-only)', usage: 'scroll [sel]' }, + 'wait': { category: 'Interaction', description: 'Wait for element, network idle, or page load (chain-only)', usage: 'wait ' }, + 'upload': { category: 'Interaction', description: 'Upload file(s) (chain-only)', usage: 'upload [file2...]' }, + 'viewport':{ category: 'Interaction', description: 'Set viewport size (chain-only)', usage: 'viewport ' }, + 'cookie': { category: 'Interaction', description: 'Set cookie on current domain (chain-only)', usage: 'cookie =' }, + 'cookie-import': { category: 'Interaction', description: 'Import cookies from JSON file (chain-only)', usage: 'cookie-import ' }, + 'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from browser (chain-only)', usage: 'cookie-import-browser [browser] [--domain d]' }, + 'header': { category: 'Interaction', description: 'Set custom request header (chain-only)', usage: 'header :' }, + 'useragent': { category: 'Interaction', description: 'Set user agent (chain-only)', usage: 'useragent ' }, + 'dialog-accept': { category: 'Interaction', description: 'Auto-accept dialogs (chain-only)', usage: 'dialog-accept [text]' }, + 'dialog-dismiss': { category: 'Interaction', description: 'Auto-dismiss dialogs (chain-only)' }, // Visual - 'screenshot': { category: 'Visual', description: 'Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport)', usage: 'screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]' }, + 'screenshot': { category: 'Visual', description: 'Save screenshot (deduped — returns "unchanged" if identical to previous)', usage: 'screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]' }, 'pdf': { category: 'Visual', description: 'Save as PDF', usage: 'pdf [path]' }, - 'responsive': { category: 'Visual', description: 'Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc.', usage: 'responsive [prefix]' }, + 'responsive': { category: 'Visual', description: 'Screenshots at mobile, tablet, desktop breakpoints', usage: 'responsive [prefix]' }, 'diff': { category: 'Visual', description: 'Text diff between pages', usage: 'diff ' }, + // Snapshot + 'snapshot':{ category: 'Snapshot', description: 'Accessibility tree with @e refs. Flags: -i interactive, -c compact, -d N depth, -s sel scope, -D diff, -a annotated, -o path, -C cursor-interactive', usage: 'snapshot [flags]' }, // Tabs 'tabs': { category: 'Tabs', description: 'List open tabs' }, 'tab': { category: 'Tabs', description: 'Switch to tab', usage: 'tab ' }, @@ -92,16 +110,12 @@ export const COMMAND_DESCRIPTIONS: Record'); let commands: string[][]; try { @@ -204,17 +222,45 @@ export async function handleMetaCommand( const [name, ...cmdArgs] = cmd; try { let result: string; - if (WRITE_COMMANDS.has(name)) result = await handleWriteCommand(name, cmdArgs, bm); + if (CHAIN_ONLY_COMMANDS.has(name)) result = await handleWriteCommand(name, cmdArgs, bm); else if (READ_COMMANDS.has(name)) result = await handleReadCommand(name, cmdArgs, bm); else if (META_COMMANDS.has(name)) result = await handleMetaCommand(name, cmdArgs, bm, shutdown); else throw new Error(`Unknown command: ${name}`); results.push(`[${name}] ${result}`); } catch (err: any) { results.push(`[${name}] ERROR: ${err.message}`); + break; // Stop on first error } } - return results.join('\n\n'); + // Wait for network to settle after all actions + try { + const page = bm.getPage(); + await page.waitForLoadState('networkidle', { timeout: 2000 }).catch(() => {}); + } catch {} + + // Auto-observation: snapshot + page state + let snapshot: string; + try { + snapshot = await handleSnapshot(['-i'], bm); + } catch { + snapshot = '(snapshot failed)'; + } + + const state = await bm.getPageState(); + const observation = [ + '', + '── observation ──', + `URL: ${state.url}`, + `Title: ${state.title}`, + `Viewport: ${state.viewport ? `${state.viewport.width}x${state.viewport.height}` : 'unknown'}`, + `Focus: ${state.focusedElement || 'none'}`, + `Dialogs: ${state.dialogState}`, + '', + snapshot, + ].join('\n'); + + return results.join('\n') + observation; } // ─── Diff ────────────────────────────────────────── diff --git a/browse/src/mouse.ts b/browse/src/mouse.ts new file mode 100644 index 00000000..c259c049 --- /dev/null +++ b/browse/src/mouse.ts @@ -0,0 +1,55 @@ +/** + * Mouse movement with cubic easing — simulates human-like cursor movement. + * + * Tracks last known mouse position on BrowserManager. + * Eases from current position to target with step count proportional to distance. + */ + +import type { Page } from 'playwright'; +import type { BrowserManager } from './browser-manager'; + +/** + * Cubic ease-in-out: slow start, fast middle, slow end. + * t ∈ [0, 1] → output ∈ [0, 1] + */ +export function easeInOutCubic(t: number): number { + return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2; +} + +/** + * Move mouse from current tracked position to (toX, toY) with eased interpolation. + * + * Steps: 3–40, proportional to distance (1 step per 20px). + * Duration: 30–300ms total, proportional to distance (0.5ms per px). + */ +export async function moveMouseEased( + page: Page, + bm: BrowserManager, + toX: number, + toY: number +): Promise { + const { x: fromX, y: fromY } = bm.getMousePosition(); + const distance = Math.hypot(toX - fromX, toY - fromY); + + // Skip movement if already at target (or very close) + if (distance < 2) { + bm.setMousePosition(toX, toY); + return; + } + + const steps = Math.max(3, Math.min(40, Math.ceil(distance / 20))); + const duration = Math.max(30, Math.min(300, distance * 0.5)); + const stepDelay = duration / steps; + + for (let i = 1; i <= steps; i++) { + const t = easeInOutCubic(i / steps); + const x = fromX + (toX - fromX) * t; + const y = fromY + (toY - fromY) * t; + await page.mouse.move(x, y); + if (i < steps) { + await new Promise(r => setTimeout(r, stepDelay)); + } + } + + bm.setMousePosition(toX, toY); +} diff --git a/browse/src/server.ts b/browse/src/server.ts index 82af28bd..c225a1da 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -51,8 +51,9 @@ function generateHelpText(): string { } const categoryOrder = [ - 'Navigation', 'Reading', 'Interaction', 'Inspection', - 'Visual', 'Snapshot', 'Meta', 'Tabs', 'Server', + 'Chain', 'Navigation', 'Interaction', + 'Reading', 'Inspection', + 'Visual', 'Snapshot', 'Tabs', 'Server', ]; const lines = ['gstack browse — headless browser for AI agents', '', 'Commands:']; @@ -154,8 +155,8 @@ const idleCheckInterval = setInterval(() => { }, 60_000); // ─── Command Sets (from commands.ts — single source of truth) ─── -import { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS } from './commands'; -export { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS }; +import { READ_COMMANDS, CHAIN_ONLY_COMMANDS, META_COMMANDS } from './commands'; +export { READ_COMMANDS, CHAIN_ONLY_COMMANDS, META_COMMANDS }; // ─── Server ──────────────────────────────────────────────────── const browserManager = new BrowserManager(); @@ -229,8 +230,14 @@ async function handleCommand(body: any): Promise { if (READ_COMMANDS.has(command)) { result = await handleReadCommand(command, args, browserManager); - } else if (WRITE_COMMANDS.has(command)) { - result = await handleWriteCommand(command, args, browserManager); + } else if (CHAIN_ONLY_COMMANDS.has(command)) { + return new Response(JSON.stringify({ + error: `'${command}' is chain-only. Use: browse chain '[["${command}"${args.length > 0 ? ',"' + args.join('","') + '"' : ''}]]'`, + hint: 'chain is the primary interface — it executes actions and auto-observes (snapshot + page state).', + }), { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }); } else if (META_COMMANDS.has(command)) { result = await handleMetaCommand(command, args, browserManager, shutdown); } else if (command === 'help') { diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index 1bf37eb5..6f6534de 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -8,6 +8,7 @@ import type { BrowserManager } from './browser-manager'; import { findInstalledBrowsers, importCookies } from './cookie-import-browser'; import { validateNavigationUrl } from './url-validation'; +import { moveMouseEased } from './mouse'; import * as fs from 'fs'; import * as path from 'path'; import { TEMP_DIR, isPathWithin } from './platform'; @@ -70,10 +71,22 @@ export async function handleWriteCommand( const resolved = await bm.resolveRef(selector); try { - if ('locator' in resolved) { - await resolved.locator.click({ timeout: 5000 }); + // Try eased mouse movement → click at coordinates + const locator = 'locator' in resolved ? resolved.locator : page.locator(resolved.selector); + await locator.scrollIntoViewIfNeeded({ timeout: 5000 }); + const box = await locator.boundingBox({ timeout: 2000 }); + if (box) { + const centerX = box.x + box.width / 2; + const centerY = box.y + box.height / 2; + await moveMouseEased(page, bm, centerX, centerY); + await page.mouse.click(centerX, centerY); } else { - await page.click(resolved.selector, { timeout: 5000 }); + // Fallback: element has no bbox (hidden/zero-size) — use locator click + if ('locator' in resolved) { + await resolved.locator.click({ timeout: 5000 }); + } else { + await page.click(resolved.selector, { timeout: 5000 }); + } } } catch (err: any) { // Enhanced error guidance: clicking