From 56ee7c2821507f08db08a3938144e91d37675d1c Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Tue, 23 Jun 2026 10:43:06 +0200
Subject: [PATCH 1/9] docs: design spec for --test filter flag

---
 .../specs/2026-06-23-filter-tests-design.md   | 100 ++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-06-23-filter-tests-design.md
diff --git a/docs/superpowers/specs/2026-06-23-filter-tests-design.md b/docs/superpowers/specs/2026-06-23-filter-tests-design.md
new file mode 100644
index 0000000..0b3abaa
--- /dev/null
+++ b/docs/superpowers/specs/2026-06-23-filter-tests-design.md
@@ -0,0 +1,100 @@
+# Design: `--test` filter for twd-cli
+
+**Date:** 2026-06-23
+**Branch:** `feat/filter-tests`
+**Status:** Approved
+
+## Goal
+
+Add a `--test` CLI flag to `twd-cli` that runs only a subset of registered TWD
+tests, matched by name. This enables quick, targeted execution from the CLI for
+easier debugging instead of always running the full suite.
+
+## Behavior
+
+- `--test "<value>"` is **repeatable**. Passing multiple flags means **OR**: a
+  test runs if it matches *any* of the values.
+- Matching is **case-insensitive substring** against the test's **full path**
+  string, `"Suite > Subsuite > test name"`.
+  - `--test "shows error"` runs the test whose name contains "shows error".
+  - `--test "Login"` runs every test under `describe("Login", …)`, because
+    "Login" appears in those tests' full paths. Describe/suite filtering comes
+    for free from this one mechanism — there is no separate flag.
+- Both `--test <value>` and `--test=<value>` forms are accepted.
+- **Zero matches → exit 1** with a clear message listing the filter values that
+  matched nothing. A typo in CI/debug must not silently "pass" with 0 tests run.
+- When no `--test` flag is given, behavior is unchanged: the full suite runs via
+  `runAll()`.
+
+## Why this is feasible
+
+- The browser exposes the test registry at `window.__TWD_STATE__.handlers` — a
+  `Map` of `{ id, name, parent, type: 'suite' | 'test', children, depth }`. The
+  CLI can read every test/suite name *before* running anything.
+- `window.__testRunner` already has `runByIds(ids)`, which runs only the tests
+  whose ids are in the set **and** correctly executes their parent suites'
+  `beforeEach`/`afterEach` hooks (verified in `twd-js` 1.8.1 bundle).
+- The existing `src/buildTestPath.js` already turns a test id + handler list
+  into the `"Suite > … > test"` string, so the matcher reuses it.
+
+## Architecture / data flow
+
+1. **`bin/twd-cli.js`** — after the `run` command is matched, parse
+   `process.argv` for repeated `--test <value>` / `--test=<value>` into a
+   `testFilters` string array. Call `runTests({ testFilters })`. Update the help
+   text to document `--test`.
+
+2. **`src/filterTests.js`** (new, pure, unit-testable) —
+   `selectTestIds(handlers, filters)`:
+   - For each handler with `type === 'test'`, build its full path via
+     `buildTestPath(handler.id, handlers)`.
+   - Lowercase the path and each filter; the test matches if its path contains
+     any filter as a substring.
+   - Returns `{ ids: string[], unmatchedFilters: string[] }`, where
+     `unmatchedFilters` lists filter values that matched no test (used for the
+     zero-match error and diagnostics).
+
+3. **`src/index.js`** — `runTests(options = {})` accepts `options.testFilters`
+   (default `[]`):
+   - When `testFilters` is non-empty: after `waitForSelector('#twd-sidebar-root')`,
+     run one `page.evaluate` that reads `window.__TWD_STATE__.handlers` and
+     returns `[{ id, name, parent, type }]`. Call `selectTestIds` **in Node**.
+   - If no ids match: log `No tests matched filter(s): "x", "y"`, close the
+     browser, and return `true` (→ the CLI exits 1).
+   - Otherwise pass the selected ids into the run `page.evaluate`, which calls
+     `runner.runByIds(ids)` instead of `runner.runAll()`. Because `runByIds`
+     only fires `onPass`/`onFail` for tests that ran, `testStatus` naturally
+     contains just the filtered tests and the existing report/summary code needs
+     no other change.
+   - **Coverage is skipped whenever a filter is active.** A filtered run is a
+     debug run; writing partial coverage to `.nyc_output` would pollute later
+     full-run reports. This is logged so the skip is not surprising.
+
+## Testing
+
+- **Unit tests for `selectTestIds`** (`tests/filterTests.test.js`): substring
+  matching, case-insensitivity, full-path matching that picks up describe names,
+  leaf test-name matching, multiple filters (OR), and the no-match case
+  populating `unmatchedFilters`.
+- **Extend `tests/runTests.test.js`**: a filtered run calls `runByIds` with the
+  expected ids; a zero-match filtered run returns `true` (exit 1); coverage is
+  skipped when a filter is active.
+
+## Documentation
+
+- Add a "Filtering tests" subsection under Usage in `README.md`: repeatable
+  flag, case-insensitive substring matching, matching against the full
+  `suite > test` path (so describe names work), worked examples, and the
+  zero-match-exits-1 behavior.
+
+## Git / dependencies
+
+- All work on branch `feat/filter-tests`; no commits on `main`.
+- No dependency changes, so `npm run lock:linux` is not required.
+
+## Out of scope
+
+- Glob/regex matching, exact-match mode, or a separate `--describe` flag. The
+  substring-on-full-path mechanism covers the stated debugging use case; these
+  can be added later if a concrete need appears.
+- Filtering by file, tag, or status.

From 43794d6a57142de0100f0cd5197067d2deb176d5 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Tue, 23 Jun 2026 10:47:37 +0200
Subject: [PATCH 2/9] docs: implementation plan for --test filter flag

---
 .../plans/2026-06-23-filter-tests.md          | 610 ++++++++++++++++++
 1 file changed, 610 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-06-23-filter-tests.md

diff --git a/docs/superpowers/plans/2026-06-23-filter-tests.md b/docs/superpowers/plans/2026-06-23-filter-tests.md
new file mode 100644
index 0000000..c1ccbb9
--- /dev/null
+++ b/docs/superpowers/plans/2026-06-23-filter-tests.md
@@ -0,0 +1,610 @@
+# `--test` Filter Flag Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add a repeatable `--test "<value>"` CLI flag that runs only the TWD tests whose full `"Suite > test"` path contains a filter value (case-insensitive), for fast targeted debugging.
+
+**Architecture:** Two new pure, unit-tested helpers (`parseRunArgs` for the CLI, `selectTestIds` for matching) feed into `runTests()`. When filters are present, `runTests` reads the in-browser test registry (`window.__TWD_STATE__.handlers`), matches in Node, and runs the matched ids via the existing `window.__testRunner.runByIds(ids)` instead of `runAll()`. No new dependencies.
+
+**Tech Stack:** Node.js ESM, Puppeteer, twd-js, Vitest.
+
+## Global Constraints
+
+- ESM only (`import`/`export`), Node >= 18.
+- No new dependencies — `package.json` `dependencies` stay `openapi-mock-validator`, `puppeteer`, `twd-js`. (No `npm run lock:linux` needed.)
+- All work on branch `feat/filter-tests`. No commits on `main`.
+- Matching is **case-insensitive substring** against the **full path** `"Suite > Subsuite > test name"`.
+- Multiple `--test` flags are **OR**. Zero total matches → CLI exits 1.
+- Reuse the existing `src/buildTestPath.js` helper for path construction.
+
+---
+
+### Task 1: `selectTestIds` matcher
+
+**Files:**
+- Create: `src/filterTests.js`
+- Test: `tests/filterTests.test.js`
+
+**Interfaces:**
+- Consumes: `buildTestPath(testId, handlers)` from `src/buildTestPath.js` — returns `"Suite > test"` or `null`.
+- Produces: `selectTestIds(handlers, filters)` where `handlers` is `Array<{ id, name, parent, type }>` and `filters` is `string[]`. Returns `{ ids: string[], unmatchedFilters: string[] }`. `ids` are the ids of `type === 'test'` handlers whose lowercased full path contains any lowercased filter as a substring. `unmatchedFilters` are filter values (original casing) that matched no test.
+
+- [ ] **Step 1: Write the failing test**
+
+```js
+// tests/filterTests.test.js
+import { describe, it, expect } from "vitest";
+import { selectTestIds } from "../src/filterTests.js";
+
+const handlers = [
+  { id: 's1', name: 'Login', parent: undefined, type: 'suite' },
+  { id: 't1', name: 'shows error on bad password', parent: 's1', type: 'test' },
+  { id: 't2', name: 'redirects on success', parent: 's1', type: 'test' },
+  { id: 's2', name: 'Signup', parent: undefined, type: 'suite' },
+  { id: 't3', name: 'shows error on taken email', parent: 's2', type: 'test' },
+];
+
+describe("selectTestIds", () => {
+  it("matches a leaf test name by case-insensitive substring", () => {
+    const { ids, unmatchedFilters } = selectTestIds(handlers, ['REDIRECTS']);
+    expect(ids).toEqual(['t2']);
+    expect(unmatchedFilters).toEqual([]);
+  });
+
+  it("matches all tests under a describe via the full path", () => {
+    const { ids } = selectTestIds(handlers, ['Login']);
+    expect(ids.sort()).toEqual(['t1', 't2']);
+  });
+
+  it("treats multiple filters as OR", () => {
+    const { ids } = selectTestIds(handlers, ['redirects', 'taken email']);
+    expect(ids.sort()).toEqual(['t2', 't3']);
+  });
+
+  it("matches the same substring across suites", () => {
+    const { ids } = selectTestIds(handlers, ['shows error']);
+    expect(ids.sort()).toEqual(['t1', 't3']);
+  });
+
+  it("reports filters that matched nothing", () => {
+    const { ids, unmatchedFilters } = selectTestIds(handlers, ['Login', 'nope']);
+    expect(ids.sort()).toEqual(['t1', 't2']);
+    expect(unmatchedFilters).toEqual(['nope']);
+  });
+
+  it("returns empty ids when nothing matches", () => {
+    const { ids, unmatchedFilters } = selectTestIds(handlers, ['zzz']);
+    expect(ids).toEqual([]);
+    expect(unmatchedFilters).toEqual(['zzz']);
+  });
+
+  it("ignores suite handlers as run targets", () => {
+    const { ids } = selectTestIds(handlers, ['Signup']);
+    expect(ids).toEqual(['t3']); // s2 (the suite) is never an id
+  });
+});
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `npx vitest run tests/filterTests.test.js`
+Expected: FAIL — `Failed to resolve import "../src/filterTests.js"` / `selectTestIds is not a function`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+```js
+// src/filterTests.js
+import { buildTestPath } from './buildTestPath.js';
+
+export function selectTestIds(handlers, filters) {
+  const needles = filters.map((f) => f.toLowerCase());
+  const matchedNeedles = new Set();
+  const ids = [];
+
+  for (const handler of handlers) {
+    if (handler.type !== 'test') continue;
+    const path = buildTestPath(handler.id, handlers);
+    if (!path) continue;
+    const haystack = path.toLowerCase();
+
+    let matched = false;
+    for (let i = 0; i < needles.length; i++) {
+      if (haystack.includes(needles[i])) {
+        matched = true;
+        matchedNeedles.add(i);
+      }
+    }
+    if (matched) ids.push(handler.id);
+  }
+
+  const unmatchedFilters = filters.filter((_, i) => !matchedNeedles.has(i));
+  return { ids, unmatchedFilters };
+}
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `npx vitest run tests/filterTests.test.js`
+Expected: PASS (7 passed).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/filterTests.js tests/filterTests.test.js
+git commit -m "feat: add selectTestIds matcher for test filtering"
+```
+
+---
+
+### Task 2: `parseRunArgs` CLI argument parser
+
+**Files:**
+- Create: `src/parseArgs.js`
+- Test: `tests/parseArgs.test.js`
+
+**Interfaces:**
+- Produces: `parseRunArgs(argv)` where `argv` is the array of tokens **after** the `run` command (i.e. `process.argv.slice(3)`). Returns `{ testFilters: string[] }`. Supports both `--test <value>` (value is the next token) and `--test=<value>` forms. Unknown tokens are ignored.
+
+- [ ] **Step 1: Write the failing test**
+
+```js
+// tests/parseArgs.test.js
+import { describe, it, expect } from "vitest";
+import { parseRunArgs } from "../src/parseArgs.js";
+
+describe("parseRunArgs", () => {
+  it("returns empty filters when no args", () => {
+    expect(parseRunArgs([])).toEqual({ testFilters: [] });
+  });
+
+  it("parses a single --test <value>", () => {
+    expect(parseRunArgs(['--test', 'shows error'])).toEqual({
+      testFilters: ['shows error'],
+    });
+  });
+
+  it("parses repeated --test flags in order", () => {
+    expect(parseRunArgs(['--test', 'Login', '--test', 'Signup'])).toEqual({
+      testFilters: ['Login', 'Signup'],
+    });
+  });
+
+  it("parses the --test=<value> form", () => {
+    expect(parseRunArgs(['--test=Login'])).toEqual({
+      testFilters: ['Login'],
+    });
+  });
+
+  it("ignores a trailing --test with no value", () => {
+    expect(parseRunArgs(['--test'])).toEqual({ testFilters: [] });
+  });
+
+  it("ignores unknown tokens", () => {
+    expect(parseRunArgs(['--verbose', '--test', 'Login'])).toEqual({
+      testFilters: ['Login'],
+    });
+  });
+});
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `npx vitest run tests/parseArgs.test.js`
+Expected: FAIL — `Failed to resolve import "../src/parseArgs.js"`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+```js
+// src/parseArgs.js
+export function parseRunArgs(argv) {
+  const testFilters = [];
+
+  for (let i = 0; i < argv.length; i++) {
+    const token = argv[i];
+    if (token === '--test') {
+      const value = argv[i + 1];
+      if (value !== undefined) {
+        testFilters.push(value);
+        i++;
+      }
+    } else if (token.startsWith('--test=')) {
+      testFilters.push(token.slice('--test='.length));
+    }
+  }
+
+  return { testFilters };
+}
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `npx vitest run tests/parseArgs.test.js`
+Expected: PASS (6 passed).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/parseArgs.js tests/parseArgs.test.js
+git commit -m "feat: add parseRunArgs CLI argument parser"
+```
+
+---
+
+### Task 3: Wire filtering into `runTests`
+
+**Files:**
+- Modify: `src/index.js`
+- Test: `tests/runTests.test.js`
+
+**Interfaces:**
+- Consumes: `selectTestIds(handlers, filters)` from `src/filterTests.js` (Task 1).
+- Produces: `runTests(options = {})` now accepts `options.testFilters: string[]` (default `[]`). When non-empty it enumerates the in-browser registry, computes selected ids in Node, and the run `page.evaluate` is called as `evaluate(fn, retryCount, selectedIds)` where `selectedIds` is `string[]` (filtered) or `null` (full run). Zero matches → returns `true` and the run `evaluate` is **not** called. Coverage collection is skipped whenever `selectedIds` is non-null.
+
+- [ ] **Step 1: Write the failing tests**
+
+Add these tests inside the existing `describe("runTests", …)` block in `tests/runTests.test.js` (the file's mocks and helpers from Task context are already in place):
+
+```js
+  it("passes selectedIds=null to the run evaluate when no filter", async () => {
+    const testStatus = [{ id: '1', status: 'pass' }];
+    const handlers = [{ id: '1', name: 'test1', type: 'test' }];
+    const page = createMockPage({ handlers, testStatus });
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+
+    await runTests();
+
+    expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 2, null);
+  });
+
+  it("runs only matching tests when a --test filter is given", async () => {
+    const registry = [
+      { id: 's1', name: 'Login', parent: undefined, type: 'suite' },
+      { id: 't1', name: 'shows error', parent: 's1', type: 'test' },
+      { id: 't2', name: 'redirects', parent: 's1', type: 'test' },
+    ];
+    const runResult = {
+      handlers: registry,
+      testStatus: [{ id: 't1', status: 'pass' }],
+    };
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn()
+        .mockResolvedValueOnce(registry)   // enumeration pass
+        .mockResolvedValueOnce(runResult), // run pass
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+
+    const result = await runTests({ testFilters: ['shows error'] });
+
+    expect(result).toBe(false);
+    // second evaluate call is the run; selectedIds is the matched ids
+    expect(page.evaluate).toHaveBeenNthCalledWith(2, expect.any(Function), 2, ['t1']);
+  });
+
+  it("returns true and skips the run when a filter matches nothing", async () => {
+    const registry = [
+      { id: 't1', name: 'shows error', parent: undefined, type: 'test' },
+    ];
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn().mockResolvedValueOnce(registry),
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+    const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+
+    const result = await runTests({ testFilters: ['nope'] });
+
+    expect(result).toBe(true);
+    expect(page.evaluate).toHaveBeenCalledTimes(1); // enumeration only, no run
+    expect(browser.close).toHaveBeenCalled();
+    const errors = errorSpy.mock.calls.map((c) => String(c[0]));
+    expect(errors.some((e) => e.includes('No tests matched') && e.includes('nope'))).toBe(true);
+    errorSpy.mockRestore();
+  });
+
+  it("skips coverage collection when a filter is active", async () => {
+    const registry = [
+      { id: 't1', name: 'shows error', parent: undefined, type: 'test' },
+    ];
+    const runResult = { handlers: registry, testStatus: [{ id: 't1', status: 'pass' }] };
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn()
+        .mockResolvedValueOnce(registry)
+        .mockResolvedValueOnce(runResult),
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+    vi.mocked(loadConfig).mockReturnValue({ ...defaultMockConfig, coverage: true });
+
+    await runTests({ testFilters: ['shows error'] });
+
+    // only the 2 evaluate calls happened (enumeration + run); coverage would be a 3rd
+    expect(page.evaluate).toHaveBeenCalledTimes(2);
+    expect(fs.writeFileSync).not.toHaveBeenCalled();
+  });
+```
+
+Also update the existing assertion in `should pass retryCount to page.evaluate`:
+
+```js
+    // page.evaluate is called with (fn, retryCount, selectedIds)
+    expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3, null);
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `npx vitest run tests/runTests.test.js`
+Expected: FAIL — new filter tests fail (no enumeration pass, `selectedIds` arg missing); the updated retryCount assertion fails because the current code calls `evaluate(fn, 3)` without the third arg.
+
+- [ ] **Step 3: Add the imports and `testFilters` parameter**
+
+In `src/index.js`, add the import near the other `./` imports (after line 10):
+
+```js
+import { selectTestIds } from './filterTests.js';
+```
+
+Change the function signature (line 20) from:
+
+```js
+export async function runTests() {
+```
+
+to:
+
+```js
+export async function runTests(options = {}) {
+  const { testFilters = [] } = options;
+```
+
+- [ ] **Step 4: Add the enumeration + matching block before the run `page.evaluate`**
+
+In `src/index.js`, immediately after the `console.log('Page loaded. Starting tests...');` line (currently line 64) and before the `const { handlers, testStatus } = await page.evaluate(` block, insert:
+
+```js
+    // Resolve --test filters to a concrete set of test ids (null = run all)
+    let selectedIds = null;
+    if (testFilters.length > 0) {
+      const registeredHandlers = await page.evaluate(() => {
+        const state = window.__TWD_STATE__;
+        if (!state || !state.handlers) return [];
+        return Array.from(state.handlers.values()).map((h) => ({
+          id: h.id,
+          name: h.name,
+          parent: h.parent,
+          type: h.type,
+        }));
+      });
+
+      const { ids, unmatchedFilters } = selectTestIds(registeredHandlers, testFilters);
+
+      if (ids.length === 0) {
+        console.error(
+          `No tests matched filter(s): ${testFilters.map((f) => `"${f}"`).join(', ')}`
+        );
+        await browser.close();
+        return true;
+      }
+
+      if (unmatchedFilters.length > 0) {
+        console.warn(
+          `Warning: no tests matched: ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}`
+        );
+      }
+
+      selectedIds = ids;
+      console.log(`Filtering: running ${ids.length} test(s) matching --test filter(s).`);
+    }
+```
+
+- [ ] **Step 5: Make the run `page.evaluate` honor `selectedIds`**
+
+In `src/index.js`, change the run evaluate's function signature from:
+
+```js
+    const { handlers, testStatus } = await page.evaluate(async (retryCount) => {
+```
+
+to:
+
+```js
+    const { handlers, testStatus } = await page.evaluate(async (retryCount, selectedIds) => {
+```
+
+Change the run call from:
+
+```js
+      const handlers = await runner.runAll();
+```
+
+to:
+
+```js
+      const handlers = selectedIds
+        ? await runner.runByIds(selectedIds)
+        : await runner.runAll();
+```
+
+And change the trailing args of that `page.evaluate(...)` call from:
+
+```js
+    }, config.retryCount);
+```
+
+to:
+
+```js
+    }, config.retryCount, selectedIds);
+```
+
+- [ ] **Step 6: Skip coverage when filtering**
+
+In `src/index.js`, find the coverage block:
+
+```js
+    // Handle code coverage if enabled
+    if (config.coverage && !hasFailures) {
+```
+
+Replace those two lines with this (adds a skip-log for filtered runs, then adds `&& !selectedIds` to the guard):
+
+```js
+    // Handle code coverage if enabled (skipped when a --test filter is active)
+    if (selectedIds && config.coverage) {
+      console.log('Skipping coverage collection (test filter active).');
+    }
+    if (config.coverage && !hasFailures && !selectedIds) {
+```
+
+- [ ] **Step 7: Run tests to verify they pass**
+
+Run: `npx vitest run tests/runTests.test.js`
+Expected: PASS (all existing + 4 new filter tests).
+
+- [ ] **Step 8: Run the full suite**
+
+Run: `npm run test:ci`
+Expected: PASS — all test files green.
+
+- [ ] **Step 9: Commit**
+
+```bash
+git add src/index.js tests/runTests.test.js
+git commit -m "feat: filter tests via --test in runTests"
+```
+
+---
+
+### Task 4: Wire the `--test` flag into the CLI entry point
+
+**Files:**
+- Modify: `bin/twd-cli.js`
+
+**Interfaces:**
+- Consumes: `parseRunArgs(argv)` from `src/parseArgs.js` (Task 2) and `runTests(options)` from `src/index.js` (Task 3).
+
+- [ ] **Step 1: Update `bin/twd-cli.js`**
+
+Replace the entire contents of `bin/twd-cli.js` with:
+
+```js
+#!/usr/bin/env node
+
+import { runTests } from '../src/index.js';
+import { parseRunArgs } from '../src/parseArgs.js';
+
+const command = process.argv[2];
+
+if (command === 'run') {
+  try {
+    const { testFilters } = parseRunArgs(process.argv.slice(3));
+    const hasFailures = await runTests({ testFilters });
+    process.exit(hasFailures ? 1 : 0);
+  } catch (error) {
+    process.exit(1);
+  }
+} else {
+  console.log(`
+twd-cli - Test runner for TWD tests
+
+Usage:
+  npx twd-cli run                  Run all tests
+  npx twd-cli run --test "<name>"  Run only tests whose "suite > test" path
+                                   contains <name> (case-insensitive).
+                                   Repeatable; multiple --test values are OR'd.
+
+Examples:
+  npx twd-cli run --test "shows error"
+  npx twd-cli run --test "Login" --test "Signup"
+
+Options:
+  Create a twd.config.json file in your project root to customize settings.
+  `);
+  process.exit(command ? 1 : 0);
+}
+```
+
+- [ ] **Step 2: Verify the help text manually**
+
+Run: `node ./bin/twd-cli.js`
+Expected: help text prints and includes the `--test "<name>"` usage and the two examples; process exits 0 (no command given).
+
+- [ ] **Step 3: Verify arg parsing reaches runTests (no dev server needed)**
+
+Run: `node ./bin/twd-cli.js run --test "definitely-not-a-real-test-xyz"`
+Expected: the runner starts, fails to connect to the dev server (Puppeteer navigation/selector error), and the process exits 1. This confirms the `run` path executes with filters wired in. (A full green-path check happens in Task 5's manual run / existing CI usage.)
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add bin/twd-cli.js
+git commit -m "feat: parse --test flag in CLI entry point"
+```
+
+---
+
+### Task 5: Document `--test` in the README
+
+**Files:**
+- Modify: `README.md`
+
+- [ ] **Step 1: Add a "Filtering tests" subsection**
+
+In `README.md`, directly after the `### Basic Usage` section (before `### Configuration`), insert:
+
+```markdown
+### Filtering tests
+
+Run only a subset of tests with the repeatable `--test` flag. Matching is
+**case-insensitive** and matches a **substring** of each test's full
+`"Suite > test name"` path:
+
+```bash
+# Run every test whose name contains "shows error"
+npx twd-cli run --test "shows error"
+
+# Because matching uses the full "suite > test" path, passing a describe
+# name runs every test inside that describe block:
+npx twd-cli run --test "Login"
+
+# Multiple --test flags are combined with OR (a test runs if it matches any):
+npx twd-cli run --test "Login" --test "Signup"
+```
+
+Notes:
+
+- If no test matches any filter, the run exits with code `1` and prints
+  `No tests matched filter(s): …` — so a typo won't silently look like a pass.
+- Code coverage collection is skipped while a `--test` filter is active, since a
+  filtered run is a partial (debug) run.
+```
+
+- [ ] **Step 2: Verify the README renders**
+
+Run: `grep -n "Filtering tests" README.md`
+Expected: prints the new heading line.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add README.md
+git commit -m "docs: document --test filter flag"
+```
+
+---
+
+## Self-Review Notes
+
+- **Spec coverage:** repeatable `--test` (Task 2, 4) ✓; case-insensitive substring on full path incl. describe (Task 1) ✓; OR semantics (Task 1) ✓; zero-match exits 1 (Task 3) ✓; `runByIds` selective run (Task 3) ✓; coverage skipped under filter (Task 3) ✓; `buildTestPath` reuse (Task 1) ✓; README "Filtering tests" (Task 5) ✓; no deps / branch discipline (Global Constraints) ✓.
+- **Type consistency:** `selectTestIds(handlers, filters) -> { ids, unmatchedFilters }` used identically in Task 1 and Task 3; `parseRunArgs(argv) -> { testFilters }` used identically in Task 2 and Task 4; `runTests({ testFilters })` consistent across Tasks 3–4; the run `page.evaluate(fn, retryCount, selectedIds)` shape is consistent across the Task 3 steps and tests.
+- **Placeholders:** none — every code/test step is complete.

From c4a1b4660ae152380303fa0f9e7a2c6527e16e70 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Wed, 24 Jun 2026 22:41:13 +0200
Subject: [PATCH 3/9] feat: add selectTestIds matcher for test filtering

---
 src/filterTests.js        | 26 ++++++++++++++++++++
 tests/filterTests.test.js | 50 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 src/filterTests.js
 create mode 100644 tests/filterTests.test.js

diff --git a/src/filterTests.js b/src/filterTests.js
new file mode 100644
index 0000000..d423182
--- /dev/null
+++ b/src/filterTests.js
@@ -0,0 +1,26 @@
+import { buildTestPath } from './buildTestPath.js';
+
+export function selectTestIds(handlers, filters) {
+  const needles = filters.map((f) => f.toLowerCase());
+  const matchedNeedles = new Set();
+  const ids = [];
+
+  for (const handler of handlers) {
+    if (handler.type !== 'test') continue;
+    const path = buildTestPath(handler.id, handlers);
+    if (!path) continue;
+    const haystack = path.toLowerCase();
+
+    let matched = false;
+    for (let i = 0; i < needles.length; i++) {
+      if (haystack.includes(needles[i])) {
+        matched = true;
+        matchedNeedles.add(i);
+      }
+    }
+    if (matched) ids.push(handler.id);
+  }
+
+  const unmatchedFilters = filters.filter((_, i) => !matchedNeedles.has(i));
+  return { ids, unmatchedFilters };
+}
diff --git a/tests/filterTests.test.js b/tests/filterTests.test.js
new file mode 100644
index 0000000..bfc2c11
--- /dev/null
+++ b/tests/filterTests.test.js
@@ -0,0 +1,50 @@
+import { describe, it, expect } from "vitest";
+import { selectTestIds } from "../src/filterTests.js";
+
+const handlers = [
+  { id: 's1', name: 'Login', parent: undefined, type: 'suite' },
+  { id: 't1', name: 'shows error on bad password', parent: 's1', type: 'test' },
+  { id: 't2', name: 'redirects on success', parent: 's1', type: 'test' },
+  { id: 's2', name: 'Signup', parent: undefined, type: 'suite' },
+  { id: 't3', name: 'shows error on taken email', parent: 's2', type: 'test' },
+];
+
+describe("selectTestIds", () => {
+  it("matches a leaf test name by case-insensitive substring", () => {
+    const { ids, unmatchedFilters } = selectTestIds(handlers, ['REDIRECTS']);
+    expect(ids).toEqual(['t2']);
+    expect(unmatchedFilters).toEqual([]);
+  });
+
+  it("matches all tests under a describe via the full path", () => {
+    const { ids } = selectTestIds(handlers, ['Login']);
+    expect(ids.sort()).toEqual(['t1', 't2']);
+  });
+
+  it("treats multiple filters as OR", () => {
+    const { ids } = selectTestIds(handlers, ['redirects', 'taken email']);
+    expect(ids.sort()).toEqual(['t2', 't3']);
+  });
+
+  it("matches the same substring across suites", () => {
+    const { ids } = selectTestIds(handlers, ['shows error']);
+    expect(ids.sort()).toEqual(['t1', 't3']);
+  });
+
+  it("reports filters that matched nothing", () => {
+    const { ids, unmatchedFilters } = selectTestIds(handlers, ['Login', 'nope']);
+    expect(ids.sort()).toEqual(['t1', 't2']);
+    expect(unmatchedFilters).toEqual(['nope']);
+  });
+
+  it("returns empty ids when nothing matches", () => {
+    const { ids, unmatchedFilters } = selectTestIds(handlers, ['zzz']);
+    expect(ids).toEqual([]);
+    expect(unmatchedFilters).toEqual(['zzz']);
+  });
+
+  it("ignores suite handlers as run targets", () => {
+    const { ids } = selectTestIds(handlers, ['Signup']);
+    expect(ids).toEqual(['t3']); // s2 (the suite) is never an id
+  });
+});

From 0c2d0851b1543db67a328ca2772f334c6d92a34f Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Wed, 24 Jun 2026 22:42:28 +0200
Subject: [PATCH 4/9] feat: add parseRunArgs CLI argument parser

---
 src/parseArgs.js        | 18 ++++++++++++++++++
 tests/parseArgs.test.js | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 src/parseArgs.js
 create mode 100644 tests/parseArgs.test.js

diff --git a/src/parseArgs.js b/src/parseArgs.js
new file mode 100644
index 0000000..8b73f91
--- /dev/null
+++ b/src/parseArgs.js
@@ -0,0 +1,18 @@
+export function parseRunArgs(argv) {
+  const testFilters = [];
+
+  for (let i = 0; i < argv.length; i++) {
+    const token = argv[i];
+    if (token === '--test') {
+      const value = argv[i + 1];
+      if (value !== undefined) {
+        testFilters.push(value);
+        i++;
+      }
+    } else if (token.startsWith('--test=')) {
+      testFilters.push(token.slice('--test='.length));
+    }
+  }
+
+  return { testFilters };
+}
diff --git a/tests/parseArgs.test.js b/tests/parseArgs.test.js
new file mode 100644
index 0000000..6c9a33f
--- /dev/null
+++ b/tests/parseArgs.test.js
@@ -0,0 +1,36 @@
+import { describe, it, expect } from "vitest";
+import { parseRunArgs } from "../src/parseArgs.js";
+
+describe("parseRunArgs", () => {
+  it("returns empty filters when no args", () => {
+    expect(parseRunArgs([])).toEqual({ testFilters: [] });
+  });
+
+  it("parses a single --test <value>", () => {
+    expect(parseRunArgs(['--test', 'shows error'])).toEqual({
+      testFilters: ['shows error'],
+    });
+  });
+
+  it("parses repeated --test flags in order", () => {
+    expect(parseRunArgs(['--test', 'Login', '--test', 'Signup'])).toEqual({
+      testFilters: ['Login', 'Signup'],
+    });
+  });
+
+  it("parses the --test=<value> form", () => {
+    expect(parseRunArgs(['--test=Login'])).toEqual({
+      testFilters: ['Login'],
+    });
+  });
+
+  it("ignores a trailing --test with no value", () => {
+    expect(parseRunArgs(['--test'])).toEqual({ testFilters: [] });
+  });
+
+  it("ignores unknown tokens", () => {
+    expect(parseRunArgs(['--verbose', '--test', 'Login'])).toEqual({
+      testFilters: ['Login'],
+    });
+  });
+});

From bfc0943d5836174c7cfc8b3e31022b428ae78a43 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Wed, 24 Jun 2026 22:45:40 +0200
Subject: [PATCH 5/9] feat: filter tests via --test in runTests

---
 src/index.js           | 53 +++++++++++++++++++++---
 tests/runTests.test.js | 92 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 8 deletions(-)

diff --git a/src/index.js b/src/index.js
index dc2c5ae..81c69a9 100644
--- a/src/index.js
+++ b/src/index.js
@@ -8,6 +8,7 @@ import { printContractReport } from './contractReport.js';
 import { generateContractMarkdown } from './contractMarkdown.js';
 import { buildTestPath } from './buildTestPath.js';
 import { formatTestSummary, formatFailedTestsBlock } from './testSummary.js';
+import { selectTestIds } from './filterTests.js';
 
 function isProtocolTimeout(error) {
   const message = error && error.message ? error.message : '';
@@ -17,7 +18,8 @@ function isProtocolTimeout(error) {
   );
 }
 
-export async function runTests() {
+export async function runTests(options = {}) {
+  const { testFilters = [] } = options;
   let browser;
   try {
     const config = loadConfig();
@@ -63,8 +65,42 @@ export async function runTests() {
     await page.waitForSelector('#twd-sidebar-root', { timeout: config.timeout });
     console.log('Page loaded. Starting tests...');
 
+    // Resolve --test filters to a concrete set of test ids (null = run all)
+    let selectedIds = null;
+    if (testFilters.length > 0) {
+      const registeredHandlers = await page.evaluate(() => {
+        const state = window.__TWD_STATE__;
+        if (!state || !state.handlers) return [];
+        return Array.from(state.handlers.values()).map((h) => ({
+          id: h.id,
+          name: h.name,
+          parent: h.parent,
+          type: h.type,
+        }));
+      });
+
+      const { ids, unmatchedFilters } = selectTestIds(registeredHandlers, testFilters);
+
+      if (ids.length === 0) {
+        console.error(
+          `No tests matched filter(s): ${testFilters.map((f) => `"${f}"`).join(', ')}`
+        );
+        await browser.close();
+        return true;
+      }
+
+      if (unmatchedFilters.length > 0) {
+        console.warn(
+          `Warning: no tests matched: ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}`
+        );
+      }
+
+      selectedIds = ids;
+      console.log(`Filtering: running ${ids.length} test(s) matching --test filter(s).`);
+    }
+
     // Execute all tests
-    const { handlers, testStatus } = await page.evaluate(async (retryCount) => {
+    const { handlers, testStatus } = await page.evaluate(async (retryCount, selectedIds) => {
       const TestRunner = window.__testRunner;
       const testStatus = [];
       const runner = new TestRunner({
@@ -86,9 +122,11 @@ export async function runTests() {
           testStatus.push({ id: test.id, status: "skip" });
         },
       }, { retryCount });
-      const handlers = await runner.runAll();
+      const handlers = selectedIds
+        ? await runner.runByIds(selectedIds)
+        : await runner.runAll();
       return { handlers: Array.from(handlers.values()), testStatus };
-    }, config.retryCount);
+    }, config.retryCount, selectedIds);
 
     const durationMs = Date.now() - startedAt;
 
@@ -143,8 +181,11 @@ export async function runTests() {
       }
     }
 
-    // Handle code coverage if enabled
-    if (config.coverage && !hasFailures) {
+    // Handle code coverage if enabled (skipped when a --test filter is active)
+    if (selectedIds && config.coverage) {
+      console.log('Skipping coverage collection (test filter active).');
+    }
+    if (config.coverage && !hasFailures && !selectedIds) {
       const coverage = await page.evaluate(() => window.__coverage__);
       if (coverage) {
         console.log('Collecting code coverage data...');
diff --git a/tests/runTests.test.js b/tests/runTests.test.js
index 960fc64..d88cd5c 100644
--- a/tests/runTests.test.js
+++ b/tests/runTests.test.js
@@ -77,8 +77,8 @@ describe("runTests", () => {
 
     await runTests();
 
-    // page.evaluate is called with (fn, retryCount)
-    expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3);
+    // page.evaluate is called with (fn, retryCount, selectedIds)
+    expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3, null);
   });
 
   it("should pass protocolTimeout to puppeteer.launch", async () => {
@@ -278,6 +278,94 @@ describe("runTests", () => {
     expect(entries[0].occurrence).toBe(1);
   });
 
+  it("passes selectedIds=null to the run evaluate when no filter", async () => {
+    const testStatus = [{ id: '1', status: 'pass' }];
+    const handlers = [{ id: '1', name: 'test1', type: 'test' }];
+    const page = createMockPage({ handlers, testStatus });
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+
+    await runTests();
+
+    expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 2, null);
+  });
+
+  it("runs only matching tests when a --test filter is given", async () => {
+    const registry = [
+      { id: 's1', name: 'Login', parent: undefined, type: 'suite' },
+      { id: 't1', name: 'shows error', parent: 's1', type: 'test' },
+      { id: 't2', name: 'redirects', parent: 's1', type: 'test' },
+    ];
+    const runResult = {
+      handlers: registry,
+      testStatus: [{ id: 't1', status: 'pass' }],
+    };
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn()
+        .mockResolvedValueOnce(registry)   // enumeration pass
+        .mockResolvedValueOnce(runResult), // run pass
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+
+    const result = await runTests({ testFilters: ['shows error'] });
+
+    expect(result).toBe(false);
+    // second evaluate call is the run; selectedIds is the matched ids
+    expect(page.evaluate).toHaveBeenNthCalledWith(2, expect.any(Function), 2, ['t1']);
+  });
+
+  it("returns true and skips the run when a filter matches nothing", async () => {
+    const registry = [
+      { id: 't1', name: 'shows error', parent: undefined, type: 'test' },
+    ];
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn().mockResolvedValueOnce(registry),
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+    const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+
+    const result = await runTests({ testFilters: ['nope'] });
+
+    expect(result).toBe(true);
+    expect(page.evaluate).toHaveBeenCalledTimes(1); // enumeration only, no run
+    expect(browser.close).toHaveBeenCalled();
+    const errors = errorSpy.mock.calls.map((c) => String(c[0]));
+    expect(errors.some((e) => e.includes('No tests matched') && e.includes('nope'))).toBe(true);
+    errorSpy.mockRestore();
+  });
+
+  it("skips coverage collection when a filter is active", async () => {
+    const registry = [
+      { id: 't1', name: 'shows error', parent: undefined, type: 'test' },
+    ];
+    const runResult = { handlers: registry, testStatus: [{ id: 't1', status: 'pass' }] };
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn()
+        .mockResolvedValueOnce(registry)
+        .mockResolvedValueOnce(runResult),
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+    vi.mocked(loadConfig).mockReturnValue({ ...defaultMockConfig, coverage: true });
+
+    await runTests({ testFilters: ['shows error'] });
+
+    // only the 2 evaluate calls happened (enumeration + run); coverage would be a 3rd
+    expect(page.evaluate).toHaveBeenCalledTimes(2);
+    expect(fs.writeFileSync).not.toHaveBeenCalled();
+  });
+
   it("should print the Tests: summary line and Failed tests block", async () => {
     const testStatus = [
       { id: '1', status: 'pass' },

From f5b191858d6325b8f55371fe5b28c3425e938902 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Wed, 24 Jun 2026 22:48:57 +0200
Subject: [PATCH 6/9] feat: parse --test flag in CLI entry point

---
 bin/twd-cli.js | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/bin/twd-cli.js b/bin/twd-cli.js
index 2c2b6c7..914fbd8 100755
--- a/bin/twd-cli.js
+++ b/bin/twd-cli.js
@@ -1,12 +1,14 @@
 #!/usr/bin/env node
 
 import { runTests } from '../src/index.js';
+import { parseRunArgs } from '../src/parseArgs.js';
 
 const command = process.argv[2];
 
 if (command === 'run') {
   try {
-    const hasFailures = await runTests();
+    const { testFilters } = parseRunArgs(process.argv.slice(3));
+    const hasFailures = await runTests({ testFilters });
     process.exit(hasFailures ? 1 : 0);
   } catch (error) {
     process.exit(1);
@@ -16,7 +18,14 @@ if (command === 'run') {
 twd-cli - Test runner for TWD tests
 
 Usage:
-  npx twd-cli run    Run all tests
+  npx twd-cli run                  Run all tests
+  npx twd-cli run --test "<name>"  Run only tests whose "suite > test" path
+                                   contains <name> (case-insensitive).
+                                   Repeatable; multiple --test values are OR'd.
+
+Examples:
+  npx twd-cli run --test "shows error"
+  npx twd-cli run --test "Login" --test "Signup"
 
 Options:
   Create a twd.config.json file in your project root to customize settings.

From 12322c8c93f5ca917fce54268053ec0a2fc04b25 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Wed, 24 Jun 2026 22:50:16 +0200
Subject: [PATCH 7/9] docs: document --test filter flag

---
 README.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/README.md b/README.md
index 181ca20..5fee22e 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,31 @@ Run tests with default configuration:
 npx twd-cli run
 ```
 
+### Filtering tests
+
+Run only a subset of tests with the repeatable `--test` flag. Matching is
+**case-insensitive** and matches a **substring** of each test's full
+`"Suite > test name"` path:
+
+```bash
+# Run every test whose name contains "shows error"
+npx twd-cli run --test "shows error"
+
+# Because matching uses the full "suite > test" path, passing a describe
+# name runs every test inside that describe block:
+npx twd-cli run --test "Login"
+
+# Multiple --test flags are combined with OR (a test runs if it matches any):
+npx twd-cli run --test "Login" --test "Signup"
+```
+
+Notes:
+
+- If no test matches any filter, the run exits with code `1` and prints
+  `No tests matched filter(s): …` — so a typo won't silently look like a pass.
+- Code coverage collection is skipped while a `--test` filter is active, since a
+  filtered run is a partial (debug) run.
+
 ### Configuration
 
 Create a `twd.config.json` file in your project root:

From 966967b56a8d5bbb693d4ec78fac6f9a79f0c40d Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Wed, 24 Jun 2026 22:53:05 +0200
Subject: [PATCH 8/9] refactor: clarify partial-match filter warning and cover
 it with a test

---
 src/index.js           |  2 +-
 tests/runTests.test.js | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/index.js b/src/index.js
index 81c69a9..da40a61 100644
--- a/src/index.js
+++ b/src/index.js
@@ -91,7 +91,7 @@ export async function runTests(options = {}) {
 
       if (unmatchedFilters.length > 0) {
         console.warn(
-          `Warning: no tests matched: ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}`
+          `Warning: these filter(s) matched no tests (others did): ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}`
         );
       }
 
diff --git a/tests/runTests.test.js b/tests/runTests.test.js
index d88cd5c..d7f9c59 100644
--- a/tests/runTests.test.js
+++ b/tests/runTests.test.js
@@ -342,6 +342,34 @@ describe("runTests", () => {
     errorSpy.mockRestore();
   });
 
+  it("warns about filters that matched nothing on a partial match", async () => {
+    const registry = [
+      { id: 's1', name: 'Login', parent: undefined, type: 'suite' },
+      { id: 't1', name: 'shows error', parent: 's1', type: 'test' },
+    ];
+    const runResult = { handlers: registry, testStatus: [{ id: 't1', status: 'pass' }] };
+    const page = {
+      goto: vi.fn(),
+      waitForSelector: vi.fn(),
+      exposeFunction: vi.fn(),
+      evaluate: vi.fn()
+        .mockResolvedValueOnce(registry)
+        .mockResolvedValueOnce(runResult),
+    };
+    const browser = createMockBrowser(page);
+    vi.mocked(puppeteer.launch).mockResolvedValue(browser);
+    const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+
+    const result = await runTests({ testFilters: ['Login', 'nope'] });
+
+    expect(result).toBe(false);
+    expect(page.evaluate).toHaveBeenNthCalledWith(2, expect.any(Function), 2, ['t1']);
+    const warnings = warnSpy.mock.calls.map((c) => String(c[0]));
+    expect(warnings.some((w) => w.includes('matched no tests') && w.includes('nope'))).toBe(true);
+    expect(warnings.some((w) => w.includes('"Login"'))).toBe(false);
+    warnSpy.mockRestore();
+  });
+
   it("skips coverage collection when a filter is active", async () => {
     const registry = [
       { id: 't1', name: 'shows error', parent: undefined, type: 'test' },

From 477e3d6617dec72fb686c1b66224aa6c59565cf4 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Thu, 25 Jun 2026 23:19:35 +0200
Subject: [PATCH 9/9] chore: re-trigger PR checks