From c0fec14d7afcc4560ec1947e6bd7a0197e462ff2 Mon Sep 17 00:00:00 2001 From: Leo Date: Wed, 6 May 2026 19:52:56 -0700 Subject: [PATCH 1/5] test(browser): tighten signup email test failure mode and prompt Drop the @pytest.mark.flaky(reruns=3) wrapper on test_signup_email_extraction so a failed run fails the test instead of silently retrying up to 3 more times. Rework the agent prompt to: - accept either signup or login (existing accounts no longer fail), - forbid Google/GitHub/SSO and force the plain email flow, - accept any logged-in landing page as success (the 'One more second' interstitial is no longer always shown, which was causing validator rejections). Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/browser/test_tools.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/browser/test_tools.py b/tests/browser/test_tools.py index cc8d532cc..567fb54b8 100644 --- a/tests/browser/test_tools.py +++ b/tests/browser/test_tools.py @@ -52,15 +52,21 @@ def test_tool_execution_in_session(persona: NottePersona, action: EmailReadActio assert len(out.data.structured.get().emails) > 0 -@pytest.mark.flaky(reruns=3, reruns_delay=5) def test_signup_email_extraction(persona: NottePersona): with notte.Session(headless=True) as session: agent = notte.Agent(session=session, persona=persona, max_steps=15) resp = agent.run( task=( - "Go to console.notte.cc, login with the email signup email, verify the account. " - "Stop after the account is verified, i.e as soon as your are on the 'One more second' page." - "CRITICAL: do not fill the in the onboarding form, just stop after the account is verified" + "Go to console.notte.cc and authenticate with the persona's email. " + "If the account does not exist yet, sign up; if it already exists, log in. Either path is fine. " + "CRITICAL: never use Google sign-in, GitHub sign-in, or any other social/SSO option — " + "always pick the plain email flow (email + password, or email magic link). " + "When a verification or magic-link email is required, check the persona's inbox and open " + "the link from that email to complete authentication. " + "Success = you are authenticated and have landed inside the console (any logged-in page is " + "acceptable, e.g. the 'One more second' interstitial, the personal/agent console, or the " + "dashboard). Stop as soon as you reach any logged-in page. " + "CRITICAL: do not fill in any onboarding form — stop immediately once authenticated." ), url="https://console.notte.cc", ) From f7bb8861f5b89353c57032dc5301daef40392836 Mon Sep 17 00:00:00 2001 From: Leo Date: Wed, 6 May 2026 19:59:48 -0700 Subject: [PATCH 2/5] test(browser): cap signup email test at 90s per attempt, restore flaky Re-add @pytest.mark.flaky(reruns=3, reruns_delay=5) so transient agent flakes don't fail the suite, but pin a 90s @pytest.mark.timeout per attempt instead of relying on the global 300s. Each retry now fails fast on a hang rather than burning the full 5-minute budget. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/browser/test_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/browser/test_tools.py b/tests/browser/test_tools.py index 567fb54b8..2e368a235 100644 --- a/tests/browser/test_tools.py +++ b/tests/browser/test_tools.py @@ -52,6 +52,8 @@ def test_tool_execution_in_session(persona: NottePersona, action: EmailReadActio assert len(out.data.structured.get().emails) > 0 +@pytest.mark.timeout(90) +@pytest.mark.flaky(reruns=3, reruns_delay=5) def test_signup_email_extraction(persona: NottePersona): with notte.Session(headless=True) as session: agent = notte.Agent(session=session, persona=persona, max_steps=15) From 74d7376e51de06d83dc96e7e4e89b7cca814624d Mon Sep 17 00:00:00 2001 From: Leo Date: Thu, 7 May 2026 10:29:32 -0700 Subject: [PATCH 3/5] test(browser): raise signup email test timeout to 120s Bump @pytest.mark.timeout from 90s to 120s. A clean run takes ~50s locally, but variance from LLM rate-limit backoff (vertex_ai 429s under concurrent xdist load) can easily double that. 120s gives a safety margin while still well below the global 300s and capping worst-case wall time across reruns at ~8 minutes. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/browser/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/browser/test_tools.py b/tests/browser/test_tools.py index 2e368a235..647e2801f 100644 --- a/tests/browser/test_tools.py +++ b/tests/browser/test_tools.py @@ -52,7 +52,7 @@ def test_tool_execution_in_session(persona: NottePersona, action: EmailReadActio assert len(out.data.structured.get().emails) > 0 -@pytest.mark.timeout(90) +@pytest.mark.timeout(120) @pytest.mark.flaky(reruns=3, reruns_delay=5) def test_signup_email_extraction(persona: NottePersona): with notte.Session(headless=True) as session: From 314faf93326de1d8b8526bf3156cc50f88378253 Mon Sep 17 00:00:00 2001 From: Leo Date: Thu, 7 May 2026 11:09:29 -0700 Subject: [PATCH 4/5] test(browser): harden anti-Google guardrails in signup email prompt Replace the soft 'never use Google sign-in' instruction with an ABSOLUTE RULE that enumerates the forbidden button labels (Google, GitHub, SSO, Microsoft, Apple, social) and instructs the agent to read each button's text BEFORE clicking. Also whitelist the acceptable email-flow button labels and add an explicit recovery clause if a forbidden button is clicked by accident. Why: even when the agent's stated intent was 'click Send magic link', the action mapper was sometimes resolving to id=B2 = 'Use Google', landing on a Google OAuth flow the agent couldn't escape. The soft prompt let it slip through; the explicit forbid-list does not. 9/9 local runs pass with the new prompt vs 1/3 previously. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/browser/test_tools.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/browser/test_tools.py b/tests/browser/test_tools.py index 647e2801f..345e3642d 100644 --- a/tests/browser/test_tools.py +++ b/tests/browser/test_tools.py @@ -61,14 +61,23 @@ def test_signup_email_extraction(persona: NottePersona): task=( "Go to console.notte.cc and authenticate with the persona's email. " "If the account does not exist yet, sign up; if it already exists, log in. Either path is fine. " - "CRITICAL: never use Google sign-in, GitHub sign-in, or any other social/SSO option — " - "always pick the plain email flow (email + password, or email magic link). " + "ABSOLUTE RULE — NEVER CLICK any of the following buttons under ANY circumstances: " + "'Use Google', 'Continue with Google', 'Sign in with Google', 'Sign up with Google', " + "'Use GitHub', 'Continue with GitHub', 'Sign in with GitHub', 'Sign up with GitHub', " + "or any button whose visible label contains the words 'Google', 'GitHub', 'SSO', " + "'Microsoft', 'Apple', or 'social'. Before EVERY click, read the button's exact text " + "label and verify it is NOT one of the forbidden labels above. If you click one of " + "these by accident, the task has FAILED — you must immediately navigate back to " + "console.notte.cc and start over. " + "The ONLY acceptable authentication path is the plain email flow: enter the email " + "address into the email input field, then click a button labeled exactly 'Send magic link', " + "'Continue with email', 'Sign in with email', or 'Submit' (or a similar email-only button). " "When a verification or magic-link email is required, check the persona's inbox and open " "the link from that email to complete authentication. " "Success = you are authenticated and have landed inside the console (any logged-in page is " "acceptable, e.g. the 'One more second' interstitial, the personal/agent console, or the " "dashboard). Stop as soon as you reach any logged-in page. " - "CRITICAL: do not fill in any onboarding form — stop immediately once authenticated." + "Do not fill in any onboarding form — stop immediately once authenticated." ), url="https://console.notte.cc", ) From 9b8dc756b740bb911dd8746dd296eca1300b9198 Mon Sep 17 00:00:00 2001 From: Leo Date: Thu, 7 May 2026 11:36:12 -0700 Subject: [PATCH 5/5] test(browser): capture signup email test logs to file artifact MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a dual loguru + stdlib FileHandler in test_signup_email_extraction that writes to /tmp/signup-debug/signup-{ts}-{pid}.log, and an actions/upload-artifact step that uploads that directory on every CI run (if: always). Why: in CI the test crashes its xdist worker at the pytest-timeout ('node down: Not properly terminated'), and the worker's captured stdout dies with it — we have zero visibility into what the agent or litellm were doing. Writing directly to a file (with enqueue=False, immediate flush) means the log survives a SIGKILL. Captures both notte's loguru output and litellm's stdlib logging (rate-limit / 429 messages, etc.), so we can finally tell whether CI crashes are 429 storms, action misclicks, or something else. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/test-cicd.yml | 9 ++++ tests/browser/test_tools.py | 79 ++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/.github/workflows/test-cicd.yml b/.github/workflows/test-cicd.yml index 419c3e549..6263cae6e 100644 --- a/.github/workflows/test-cicd.yml +++ b/.github/workflows/test-cicd.yml @@ -135,6 +135,15 @@ jobs: set -o pipefail uv run pytest -n logical tests --ignore=tests/integration/test_webvoyager_resolution.py --ignore=tests/integration/test_e2e.py --ignore=tests/examples/test_examples.py --ignore=tests/examples/test_readme.py --durations=10 --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=packages | tee pytest-coverage.txt + - name: Upload signup email test debug logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: signup-debug-logs + path: /tmp/signup-debug/ + retention-days: 7 + if-no-files-found: ignore + - name: Pytest coverage comment if: ${{ always() && github.ref != 'refs/heads/main' }} uses: MishaKav/pytest-coverage-comment@main diff --git a/tests/browser/test_tools.py b/tests/browser/test_tools.py index 345e3642d..2b13d0e09 100644 --- a/tests/browser/test_tools.py +++ b/tests/browser/test_tools.py @@ -1,4 +1,10 @@ +import logging +import os +import time +from pathlib import Path + import pytest +from loguru import logger from notte_browser.errors import NoToolProvidedError from notte_browser.tools.base import EmailReadAction, PersonaTool from notte_sdk import NotteClient @@ -55,30 +61,49 @@ def test_tool_execution_in_session(persona: NottePersona, action: EmailReadActio @pytest.mark.timeout(120) @pytest.mark.flaky(reruns=3, reruns_delay=5) def test_signup_email_extraction(persona: NottePersona): - with notte.Session(headless=True) as session: - agent = notte.Agent(session=session, persona=persona, max_steps=15) - resp = agent.run( - task=( - "Go to console.notte.cc and authenticate with the persona's email. " - "If the account does not exist yet, sign up; if it already exists, log in. Either path is fine. " - "ABSOLUTE RULE — NEVER CLICK any of the following buttons under ANY circumstances: " - "'Use Google', 'Continue with Google', 'Sign in with Google', 'Sign up with Google', " - "'Use GitHub', 'Continue with GitHub', 'Sign in with GitHub', 'Sign up with GitHub', " - "or any button whose visible label contains the words 'Google', 'GitHub', 'SSO', " - "'Microsoft', 'Apple', or 'social'. Before EVERY click, read the button's exact text " - "label and verify it is NOT one of the forbidden labels above. If you click one of " - "these by accident, the task has FAILED — you must immediately navigate back to " - "console.notte.cc and start over. " - "The ONLY acceptable authentication path is the plain email flow: enter the email " - "address into the email input field, then click a button labeled exactly 'Send magic link', " - "'Continue with email', 'Sign in with email', or 'Submit' (or a similar email-only button). " - "When a verification or magic-link email is required, check the persona's inbox and open " - "the link from that email to complete authentication. " - "Success = you are authenticated and have landed inside the console (any logged-in page is " - "acceptable, e.g. the 'One more second' interstitial, the personal/agent console, or the " - "dashboard). Stop as soon as you reach any logged-in page. " - "Do not fill in any onboarding form — stop immediately once authenticated." - ), - url="https://console.notte.cc", - ) - assert resp.success, f"Failed to run agent: {resp.answer}" + log_dir = Path(os.getenv("SIGNUP_DEBUG_LOG_DIR", "/tmp/signup-debug")) + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"signup-{int(time.time() * 1000)}-{os.getpid()}.log" + loguru_sink_id = logger.add(str(log_path), level="DEBUG", enqueue=False, backtrace=True, diagnose=True) + stdlib_handler = logging.FileHandler(str(log_path)) + stdlib_handler.setLevel(logging.DEBUG) + stdlib_handler.setFormatter(logging.Formatter("%(asctime)s [stdlib %(name)s %(levelname)s] %(message)s")) + logging.getLogger().addHandler(stdlib_handler) + logger.info(f"=== signup_email_extraction start | log_path={log_path} | pid={os.getpid()} ===") + try: + with notte.Session(headless=True) as session: + agent = notte.Agent(session=session, persona=persona, max_steps=15) + resp = agent.run( + task=( + "Go to console.notte.cc and authenticate with the persona's email. " + "If the account does not exist yet, sign up; if it already exists, log in. Either path is fine. " + "ABSOLUTE RULE — NEVER CLICK any of the following buttons under ANY circumstances: " + "'Use Google', 'Continue with Google', 'Sign in with Google', 'Sign up with Google', " + "'Use GitHub', 'Continue with GitHub', 'Sign in with GitHub', 'Sign up with GitHub', " + "or any button whose visible label contains the words 'Google', 'GitHub', 'SSO', " + "'Microsoft', 'Apple', or 'social'. Before EVERY click, read the button's exact text " + "label and verify it is NOT one of the forbidden labels above. If you click one of " + "these by accident, the task has FAILED — you must immediately navigate back to " + "console.notte.cc and start over. " + "The ONLY acceptable authentication path is the plain email flow: enter the email " + "address into the email input field, then click a button labeled exactly 'Send magic link', " + "'Continue with email', 'Sign in with email', or 'Submit' (or a similar email-only button). " + "When a verification or magic-link email is required, check the persona's inbox and open " + "the link from that email to complete authentication. " + "Success = you are authenticated and have landed inside the console (any logged-in page is " + "acceptable, e.g. the 'One more second' interstitial, the personal/agent console, or the " + "dashboard). Stop as soon as you reach any logged-in page. " + "Do not fill in any onboarding form — stop immediately once authenticated." + ), + url="https://console.notte.cc", + ) + logger.info(f"=== agent.run finished | success={resp.success} | answer={resp.answer!r} ===") + assert resp.success, f"Failed to run agent: {resp.answer}" + except BaseException as exc: + logger.exception(f"=== signup_email_extraction failed: {type(exc).__name__}: {exc} ===") + raise + finally: + logger.info("=== signup_email_extraction end ===") + logger.remove(loguru_sink_id) + logging.getLogger().removeHandler(stdlib_handler) + stdlib_handler.close()