From 56ee7c2821507f08db08a3938144e91d37675d1c Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Tue, 23 Jun 2026 10:43:06 +0200 Subject: [PATCH 1/9] docs: design spec for --test filter flag --- .../specs/2026-06-23-filter-tests-design.md | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 docs/superpowers/specs/2026-06-23-filter-tests-design.md diff --git a/docs/superpowers/specs/2026-06-23-filter-tests-design.md b/docs/superpowers/specs/2026-06-23-filter-tests-design.md new file mode 100644 index 0000000..0b3abaa --- /dev/null +++ b/docs/superpowers/specs/2026-06-23-filter-tests-design.md @@ -0,0 +1,100 @@ +# Design: `--test` filter for twd-cli + +**Date:** 2026-06-23 +**Branch:** `feat/filter-tests` +**Status:** Approved + +## Goal + +Add a `--test` CLI flag to `twd-cli` that runs only a subset of registered TWD +tests, matched by name. This enables quick, targeted execution from the CLI for +easier debugging instead of always running the full suite. + +## Behavior + +- `--test ""` is **repeatable**. Passing multiple flags means **OR**: a + test runs if it matches *any* of the values. +- Matching is **case-insensitive substring** against the test's **full path** + string, `"Suite > Subsuite > test name"`. + - `--test "shows error"` runs the test whose name contains "shows error". + - `--test "Login"` runs every test under `describe("Login", …)`, because + "Login" appears in those tests' full paths. Describe/suite filtering comes + for free from this one mechanism — there is no separate flag. +- Both `--test ` and `--test=` forms are accepted. +- **Zero matches → exit 1** with a clear message listing the filter values that + matched nothing. A typo in CI/debug must not silently "pass" with 0 tests run. +- When no `--test` flag is given, behavior is unchanged: the full suite runs via + `runAll()`. + +## Why this is feasible + +- The browser exposes the test registry at `window.__TWD_STATE__.handlers` — a + `Map` of `{ id, name, parent, type: 'suite' | 'test', children, depth }`. The + CLI can read every test/suite name *before* running anything. +- `window.__testRunner` already has `runByIds(ids)`, which runs only the tests + whose ids are in the set **and** correctly executes their parent suites' + `beforeEach`/`afterEach` hooks (verified in `twd-js` 1.8.1 bundle). +- The existing `src/buildTestPath.js` already turns a test id + handler list + into the `"Suite > … > test"` string, so the matcher reuses it. + +## Architecture / data flow + +1. **`bin/twd-cli.js`** — after the `run` command is matched, parse + `process.argv` for repeated `--test ` / `--test=` into a + `testFilters` string array. Call `runTests({ testFilters })`. Update the help + text to document `--test`. + +2. **`src/filterTests.js`** (new, pure, unit-testable) — + `selectTestIds(handlers, filters)`: + - For each handler with `type === 'test'`, build its full path via + `buildTestPath(handler.id, handlers)`. + - Lowercase the path and each filter; the test matches if its path contains + any filter as a substring. + - Returns `{ ids: string[], unmatchedFilters: string[] }`, where + `unmatchedFilters` lists filter values that matched no test (used for the + zero-match error and diagnostics). + +3. **`src/index.js`** — `runTests(options = {})` accepts `options.testFilters` + (default `[]`): + - When `testFilters` is non-empty: after `waitForSelector('#twd-sidebar-root')`, + run one `page.evaluate` that reads `window.__TWD_STATE__.handlers` and + returns `[{ id, name, parent, type }]`. Call `selectTestIds` **in Node**. + - If no ids match: log `No tests matched filter(s): "x", "y"`, close the + browser, and return `true` (→ the CLI exits 1). + - Otherwise pass the selected ids into the run `page.evaluate`, which calls + `runner.runByIds(ids)` instead of `runner.runAll()`. Because `runByIds` + only fires `onPass`/`onFail` for tests that ran, `testStatus` naturally + contains just the filtered tests and the existing report/summary code needs + no other change. + - **Coverage is skipped whenever a filter is active.** A filtered run is a + debug run; writing partial coverage to `.nyc_output` would pollute later + full-run reports. This is logged so the skip is not surprising. + +## Testing + +- **Unit tests for `selectTestIds`** (`tests/filterTests.test.js`): substring + matching, case-insensitivity, full-path matching that picks up describe names, + leaf test-name matching, multiple filters (OR), and the no-match case + populating `unmatchedFilters`. +- **Extend `tests/runTests.test.js`**: a filtered run calls `runByIds` with the + expected ids; a zero-match filtered run returns `true` (exit 1); coverage is + skipped when a filter is active. + +## Documentation + +- Add a "Filtering tests" subsection under Usage in `README.md`: repeatable + flag, case-insensitive substring matching, matching against the full + `suite > test` path (so describe names work), worked examples, and the + zero-match-exits-1 behavior. + +## Git / dependencies + +- All work on branch `feat/filter-tests`; no commits on `main`. +- No dependency changes, so `npm run lock:linux` is not required. + +## Out of scope + +- Glob/regex matching, exact-match mode, or a separate `--describe` flag. The + substring-on-full-path mechanism covers the stated debugging use case; these + can be added later if a concrete need appears. +- Filtering by file, tag, or status. From 43794d6a57142de0100f0cd5197067d2deb176d5 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Tue, 23 Jun 2026 10:47:37 +0200 Subject: [PATCH 2/9] docs: implementation plan for --test filter flag --- .../plans/2026-06-23-filter-tests.md | 610 ++++++++++++++++++ 1 file changed, 610 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-23-filter-tests.md diff --git a/docs/superpowers/plans/2026-06-23-filter-tests.md b/docs/superpowers/plans/2026-06-23-filter-tests.md new file mode 100644 index 0000000..c1ccbb9 --- /dev/null +++ b/docs/superpowers/plans/2026-06-23-filter-tests.md @@ -0,0 +1,610 @@ +# `--test` Filter Flag Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a repeatable `--test ""` CLI flag that runs only the TWD tests whose full `"Suite > test"` path contains a filter value (case-insensitive), for fast targeted debugging. + +**Architecture:** Two new pure, unit-tested helpers (`parseRunArgs` for the CLI, `selectTestIds` for matching) feed into `runTests()`. When filters are present, `runTests` reads the in-browser test registry (`window.__TWD_STATE__.handlers`), matches in Node, and runs the matched ids via the existing `window.__testRunner.runByIds(ids)` instead of `runAll()`. No new dependencies. + +**Tech Stack:** Node.js ESM, Puppeteer, twd-js, Vitest. + +## Global Constraints + +- ESM only (`import`/`export`), Node >= 18. +- No new dependencies — `package.json` `dependencies` stay `openapi-mock-validator`, `puppeteer`, `twd-js`. (No `npm run lock:linux` needed.) +- All work on branch `feat/filter-tests`. No commits on `main`. +- Matching is **case-insensitive substring** against the **full path** `"Suite > Subsuite > test name"`. +- Multiple `--test` flags are **OR**. Zero total matches → CLI exits 1. +- Reuse the existing `src/buildTestPath.js` helper for path construction. + +--- + +### Task 1: `selectTestIds` matcher + +**Files:** +- Create: `src/filterTests.js` +- Test: `tests/filterTests.test.js` + +**Interfaces:** +- Consumes: `buildTestPath(testId, handlers)` from `src/buildTestPath.js` — returns `"Suite > test"` or `null`. +- Produces: `selectTestIds(handlers, filters)` where `handlers` is `Array<{ id, name, parent, type }>` and `filters` is `string[]`. Returns `{ ids: string[], unmatchedFilters: string[] }`. `ids` are the ids of `type === 'test'` handlers whose lowercased full path contains any lowercased filter as a substring. `unmatchedFilters` are filter values (original casing) that matched no test. + +- [ ] **Step 1: Write the failing test** + +```js +// tests/filterTests.test.js +import { describe, it, expect } from "vitest"; +import { selectTestIds } from "../src/filterTests.js"; + +const handlers = [ + { id: 's1', name: 'Login', parent: undefined, type: 'suite' }, + { id: 't1', name: 'shows error on bad password', parent: 's1', type: 'test' }, + { id: 't2', name: 'redirects on success', parent: 's1', type: 'test' }, + { id: 's2', name: 'Signup', parent: undefined, type: 'suite' }, + { id: 't3', name: 'shows error on taken email', parent: 's2', type: 'test' }, +]; + +describe("selectTestIds", () => { + it("matches a leaf test name by case-insensitive substring", () => { + const { ids, unmatchedFilters } = selectTestIds(handlers, ['REDIRECTS']); + expect(ids).toEqual(['t2']); + expect(unmatchedFilters).toEqual([]); + }); + + it("matches all tests under a describe via the full path", () => { + const { ids } = selectTestIds(handlers, ['Login']); + expect(ids.sort()).toEqual(['t1', 't2']); + }); + + it("treats multiple filters as OR", () => { + const { ids } = selectTestIds(handlers, ['redirects', 'taken email']); + expect(ids.sort()).toEqual(['t2', 't3']); + }); + + it("matches the same substring across suites", () => { + const { ids } = selectTestIds(handlers, ['shows error']); + expect(ids.sort()).toEqual(['t1', 't3']); + }); + + it("reports filters that matched nothing", () => { + const { ids, unmatchedFilters } = selectTestIds(handlers, ['Login', 'nope']); + expect(ids.sort()).toEqual(['t1', 't2']); + expect(unmatchedFilters).toEqual(['nope']); + }); + + it("returns empty ids when nothing matches", () => { + const { ids, unmatchedFilters } = selectTestIds(handlers, ['zzz']); + expect(ids).toEqual([]); + expect(unmatchedFilters).toEqual(['zzz']); + }); + + it("ignores suite handlers as run targets", () => { + const { ids } = selectTestIds(handlers, ['Signup']); + expect(ids).toEqual(['t3']); // s2 (the suite) is never an id + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run tests/filterTests.test.js` +Expected: FAIL — `Failed to resolve import "../src/filterTests.js"` / `selectTestIds is not a function`. + +- [ ] **Step 3: Write minimal implementation** + +```js +// src/filterTests.js +import { buildTestPath } from './buildTestPath.js'; + +export function selectTestIds(handlers, filters) { + const needles = filters.map((f) => f.toLowerCase()); + const matchedNeedles = new Set(); + const ids = []; + + for (const handler of handlers) { + if (handler.type !== 'test') continue; + const path = buildTestPath(handler.id, handlers); + if (!path) continue; + const haystack = path.toLowerCase(); + + let matched = false; + for (let i = 0; i < needles.length; i++) { + if (haystack.includes(needles[i])) { + matched = true; + matchedNeedles.add(i); + } + } + if (matched) ids.push(handler.id); + } + + const unmatchedFilters = filters.filter((_, i) => !matchedNeedles.has(i)); + return { ids, unmatchedFilters }; +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `npx vitest run tests/filterTests.test.js` +Expected: PASS (7 passed). + +- [ ] **Step 5: Commit** + +```bash +git add src/filterTests.js tests/filterTests.test.js +git commit -m "feat: add selectTestIds matcher for test filtering" +``` + +--- + +### Task 2: `parseRunArgs` CLI argument parser + +**Files:** +- Create: `src/parseArgs.js` +- Test: `tests/parseArgs.test.js` + +**Interfaces:** +- Produces: `parseRunArgs(argv)` where `argv` is the array of tokens **after** the `run` command (i.e. `process.argv.slice(3)`). Returns `{ testFilters: string[] }`. Supports both `--test ` (value is the next token) and `--test=` forms. Unknown tokens are ignored. + +- [ ] **Step 1: Write the failing test** + +```js +// tests/parseArgs.test.js +import { describe, it, expect } from "vitest"; +import { parseRunArgs } from "../src/parseArgs.js"; + +describe("parseRunArgs", () => { + it("returns empty filters when no args", () => { + expect(parseRunArgs([])).toEqual({ testFilters: [] }); + }); + + it("parses a single --test ", () => { + expect(parseRunArgs(['--test', 'shows error'])).toEqual({ + testFilters: ['shows error'], + }); + }); + + it("parses repeated --test flags in order", () => { + expect(parseRunArgs(['--test', 'Login', '--test', 'Signup'])).toEqual({ + testFilters: ['Login', 'Signup'], + }); + }); + + it("parses the --test= form", () => { + expect(parseRunArgs(['--test=Login'])).toEqual({ + testFilters: ['Login'], + }); + }); + + it("ignores a trailing --test with no value", () => { + expect(parseRunArgs(['--test'])).toEqual({ testFilters: [] }); + }); + + it("ignores unknown tokens", () => { + expect(parseRunArgs(['--verbose', '--test', 'Login'])).toEqual({ + testFilters: ['Login'], + }); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run tests/parseArgs.test.js` +Expected: FAIL — `Failed to resolve import "../src/parseArgs.js"`. + +- [ ] **Step 3: Write minimal implementation** + +```js +// src/parseArgs.js +export function parseRunArgs(argv) { + const testFilters = []; + + for (let i = 0; i < argv.length; i++) { + const token = argv[i]; + if (token === '--test') { + const value = argv[i + 1]; + if (value !== undefined) { + testFilters.push(value); + i++; + } + } else if (token.startsWith('--test=')) { + testFilters.push(token.slice('--test='.length)); + } + } + + return { testFilters }; +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `npx vitest run tests/parseArgs.test.js` +Expected: PASS (6 passed). + +- [ ] **Step 5: Commit** + +```bash +git add src/parseArgs.js tests/parseArgs.test.js +git commit -m "feat: add parseRunArgs CLI argument parser" +``` + +--- + +### Task 3: Wire filtering into `runTests` + +**Files:** +- Modify: `src/index.js` +- Test: `tests/runTests.test.js` + +**Interfaces:** +- Consumes: `selectTestIds(handlers, filters)` from `src/filterTests.js` (Task 1). +- Produces: `runTests(options = {})` now accepts `options.testFilters: string[]` (default `[]`). When non-empty it enumerates the in-browser registry, computes selected ids in Node, and the run `page.evaluate` is called as `evaluate(fn, retryCount, selectedIds)` where `selectedIds` is `string[]` (filtered) or `null` (full run). Zero matches → returns `true` and the run `evaluate` is **not** called. Coverage collection is skipped whenever `selectedIds` is non-null. + +- [ ] **Step 1: Write the failing tests** + +Add these tests inside the existing `describe("runTests", …)` block in `tests/runTests.test.js` (the file's mocks and helpers from Task context are already in place): + +```js + it("passes selectedIds=null to the run evaluate when no filter", async () => { + const testStatus = [{ id: '1', status: 'pass' }]; + const handlers = [{ id: '1', name: 'test1', type: 'test' }]; + const page = createMockPage({ handlers, testStatus }); + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + + await runTests(); + + expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 2, null); + }); + + it("runs only matching tests when a --test filter is given", async () => { + const registry = [ + { id: 's1', name: 'Login', parent: undefined, type: 'suite' }, + { id: 't1', name: 'shows error', parent: 's1', type: 'test' }, + { id: 't2', name: 'redirects', parent: 's1', type: 'test' }, + ]; + const runResult = { + handlers: registry, + testStatus: [{ id: 't1', status: 'pass' }], + }; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn() + .mockResolvedValueOnce(registry) // enumeration pass + .mockResolvedValueOnce(runResult), // run pass + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + + const result = await runTests({ testFilters: ['shows error'] }); + + expect(result).toBe(false); + // second evaluate call is the run; selectedIds is the matched ids + expect(page.evaluate).toHaveBeenNthCalledWith(2, expect.any(Function), 2, ['t1']); + }); + + it("returns true and skips the run when a filter matches nothing", async () => { + const registry = [ + { id: 't1', name: 'shows error', parent: undefined, type: 'test' }, + ]; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn().mockResolvedValueOnce(registry), + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + + const result = await runTests({ testFilters: ['nope'] }); + + expect(result).toBe(true); + expect(page.evaluate).toHaveBeenCalledTimes(1); // enumeration only, no run + expect(browser.close).toHaveBeenCalled(); + const errors = errorSpy.mock.calls.map((c) => String(c[0])); + expect(errors.some((e) => e.includes('No tests matched') && e.includes('nope'))).toBe(true); + errorSpy.mockRestore(); + }); + + it("skips coverage collection when a filter is active", async () => { + const registry = [ + { id: 't1', name: 'shows error', parent: undefined, type: 'test' }, + ]; + const runResult = { handlers: registry, testStatus: [{ id: 't1', status: 'pass' }] }; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn() + .mockResolvedValueOnce(registry) + .mockResolvedValueOnce(runResult), + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + vi.mocked(loadConfig).mockReturnValue({ ...defaultMockConfig, coverage: true }); + + await runTests({ testFilters: ['shows error'] }); + + // only the 2 evaluate calls happened (enumeration + run); coverage would be a 3rd + expect(page.evaluate).toHaveBeenCalledTimes(2); + expect(fs.writeFileSync).not.toHaveBeenCalled(); + }); +``` + +Also update the existing assertion in `should pass retryCount to page.evaluate`: + +```js + // page.evaluate is called with (fn, retryCount, selectedIds) + expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3, null); +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run tests/runTests.test.js` +Expected: FAIL — new filter tests fail (no enumeration pass, `selectedIds` arg missing); the updated retryCount assertion fails because the current code calls `evaluate(fn, 3)` without the third arg. + +- [ ] **Step 3: Add the imports and `testFilters` parameter** + +In `src/index.js`, add the import near the other `./` imports (after line 10): + +```js +import { selectTestIds } from './filterTests.js'; +``` + +Change the function signature (line 20) from: + +```js +export async function runTests() { +``` + +to: + +```js +export async function runTests(options = {}) { + const { testFilters = [] } = options; +``` + +- [ ] **Step 4: Add the enumeration + matching block before the run `page.evaluate`** + +In `src/index.js`, immediately after the `console.log('Page loaded. Starting tests...');` line (currently line 64) and before the `const { handlers, testStatus } = await page.evaluate(` block, insert: + +```js + // Resolve --test filters to a concrete set of test ids (null = run all) + let selectedIds = null; + if (testFilters.length > 0) { + const registeredHandlers = await page.evaluate(() => { + const state = window.__TWD_STATE__; + if (!state || !state.handlers) return []; + return Array.from(state.handlers.values()).map((h) => ({ + id: h.id, + name: h.name, + parent: h.parent, + type: h.type, + })); + }); + + const { ids, unmatchedFilters } = selectTestIds(registeredHandlers, testFilters); + + if (ids.length === 0) { + console.error( + `No tests matched filter(s): ${testFilters.map((f) => `"${f}"`).join(', ')}` + ); + await browser.close(); + return true; + } + + if (unmatchedFilters.length > 0) { + console.warn( + `Warning: no tests matched: ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}` + ); + } + + selectedIds = ids; + console.log(`Filtering: running ${ids.length} test(s) matching --test filter(s).`); + } +``` + +- [ ] **Step 5: Make the run `page.evaluate` honor `selectedIds`** + +In `src/index.js`, change the run evaluate's function signature from: + +```js + const { handlers, testStatus } = await page.evaluate(async (retryCount) => { +``` + +to: + +```js + const { handlers, testStatus } = await page.evaluate(async (retryCount, selectedIds) => { +``` + +Change the run call from: + +```js + const handlers = await runner.runAll(); +``` + +to: + +```js + const handlers = selectedIds + ? await runner.runByIds(selectedIds) + : await runner.runAll(); +``` + +And change the trailing args of that `page.evaluate(...)` call from: + +```js + }, config.retryCount); +``` + +to: + +```js + }, config.retryCount, selectedIds); +``` + +- [ ] **Step 6: Skip coverage when filtering** + +In `src/index.js`, find the coverage block: + +```js + // Handle code coverage if enabled + if (config.coverage && !hasFailures) { +``` + +Replace those two lines with this (adds a skip-log for filtered runs, then adds `&& !selectedIds` to the guard): + +```js + // Handle code coverage if enabled (skipped when a --test filter is active) + if (selectedIds && config.coverage) { + console.log('Skipping coverage collection (test filter active).'); + } + if (config.coverage && !hasFailures && !selectedIds) { +``` + +- [ ] **Step 7: Run tests to verify they pass** + +Run: `npx vitest run tests/runTests.test.js` +Expected: PASS (all existing + 4 new filter tests). + +- [ ] **Step 8: Run the full suite** + +Run: `npm run test:ci` +Expected: PASS — all test files green. + +- [ ] **Step 9: Commit** + +```bash +git add src/index.js tests/runTests.test.js +git commit -m "feat: filter tests via --test in runTests" +``` + +--- + +### Task 4: Wire the `--test` flag into the CLI entry point + +**Files:** +- Modify: `bin/twd-cli.js` + +**Interfaces:** +- Consumes: `parseRunArgs(argv)` from `src/parseArgs.js` (Task 2) and `runTests(options)` from `src/index.js` (Task 3). + +- [ ] **Step 1: Update `bin/twd-cli.js`** + +Replace the entire contents of `bin/twd-cli.js` with: + +```js +#!/usr/bin/env node + +import { runTests } from '../src/index.js'; +import { parseRunArgs } from '../src/parseArgs.js'; + +const command = process.argv[2]; + +if (command === 'run') { + try { + const { testFilters } = parseRunArgs(process.argv.slice(3)); + const hasFailures = await runTests({ testFilters }); + process.exit(hasFailures ? 1 : 0); + } catch (error) { + process.exit(1); + } +} else { + console.log(` +twd-cli - Test runner for TWD tests + +Usage: + npx twd-cli run Run all tests + npx twd-cli run --test "" Run only tests whose "suite > test" path + contains (case-insensitive). + Repeatable; multiple --test values are OR'd. + +Examples: + npx twd-cli run --test "shows error" + npx twd-cli run --test "Login" --test "Signup" + +Options: + Create a twd.config.json file in your project root to customize settings. + `); + process.exit(command ? 1 : 0); +} +``` + +- [ ] **Step 2: Verify the help text manually** + +Run: `node ./bin/twd-cli.js` +Expected: help text prints and includes the `--test ""` usage and the two examples; process exits 0 (no command given). + +- [ ] **Step 3: Verify arg parsing reaches runTests (no dev server needed)** + +Run: `node ./bin/twd-cli.js run --test "definitely-not-a-real-test-xyz"` +Expected: the runner starts, fails to connect to the dev server (Puppeteer navigation/selector error), and the process exits 1. This confirms the `run` path executes with filters wired in. (A full green-path check happens in Task 5's manual run / existing CI usage.) + +- [ ] **Step 4: Commit** + +```bash +git add bin/twd-cli.js +git commit -m "feat: parse --test flag in CLI entry point" +``` + +--- + +### Task 5: Document `--test` in the README + +**Files:** +- Modify: `README.md` + +- [ ] **Step 1: Add a "Filtering tests" subsection** + +In `README.md`, directly after the `### Basic Usage` section (before `### Configuration`), insert: + +```markdown +### Filtering tests + +Run only a subset of tests with the repeatable `--test` flag. Matching is +**case-insensitive** and matches a **substring** of each test's full +`"Suite > test name"` path: + +```bash +# Run every test whose name contains "shows error" +npx twd-cli run --test "shows error" + +# Because matching uses the full "suite > test" path, passing a describe +# name runs every test inside that describe block: +npx twd-cli run --test "Login" + +# Multiple --test flags are combined with OR (a test runs if it matches any): +npx twd-cli run --test "Login" --test "Signup" +``` + +Notes: + +- If no test matches any filter, the run exits with code `1` and prints + `No tests matched filter(s): …` — so a typo won't silently look like a pass. +- Code coverage collection is skipped while a `--test` filter is active, since a + filtered run is a partial (debug) run. +``` + +- [ ] **Step 2: Verify the README renders** + +Run: `grep -n "Filtering tests" README.md` +Expected: prints the new heading line. + +- [ ] **Step 3: Commit** + +```bash +git add README.md +git commit -m "docs: document --test filter flag" +``` + +--- + +## Self-Review Notes + +- **Spec coverage:** repeatable `--test` (Task 2, 4) ✓; case-insensitive substring on full path incl. describe (Task 1) ✓; OR semantics (Task 1) ✓; zero-match exits 1 (Task 3) ✓; `runByIds` selective run (Task 3) ✓; coverage skipped under filter (Task 3) ✓; `buildTestPath` reuse (Task 1) ✓; README "Filtering tests" (Task 5) ✓; no deps / branch discipline (Global Constraints) ✓. +- **Type consistency:** `selectTestIds(handlers, filters) -> { ids, unmatchedFilters }` used identically in Task 1 and Task 3; `parseRunArgs(argv) -> { testFilters }` used identically in Task 2 and Task 4; `runTests({ testFilters })` consistent across Tasks 3–4; the run `page.evaluate(fn, retryCount, selectedIds)` shape is consistent across the Task 3 steps and tests. +- **Placeholders:** none — every code/test step is complete. From c4a1b4660ae152380303fa0f9e7a2c6527e16e70 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Wed, 24 Jun 2026 22:41:13 +0200 Subject: [PATCH 3/9] feat: add selectTestIds matcher for test filtering --- src/filterTests.js | 26 ++++++++++++++++++++ tests/filterTests.test.js | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 src/filterTests.js create mode 100644 tests/filterTests.test.js diff --git a/src/filterTests.js b/src/filterTests.js new file mode 100644 index 0000000..d423182 --- /dev/null +++ b/src/filterTests.js @@ -0,0 +1,26 @@ +import { buildTestPath } from './buildTestPath.js'; + +export function selectTestIds(handlers, filters) { + const needles = filters.map((f) => f.toLowerCase()); + const matchedNeedles = new Set(); + const ids = []; + + for (const handler of handlers) { + if (handler.type !== 'test') continue; + const path = buildTestPath(handler.id, handlers); + if (!path) continue; + const haystack = path.toLowerCase(); + + let matched = false; + for (let i = 0; i < needles.length; i++) { + if (haystack.includes(needles[i])) { + matched = true; + matchedNeedles.add(i); + } + } + if (matched) ids.push(handler.id); + } + + const unmatchedFilters = filters.filter((_, i) => !matchedNeedles.has(i)); + return { ids, unmatchedFilters }; +} diff --git a/tests/filterTests.test.js b/tests/filterTests.test.js new file mode 100644 index 0000000..bfc2c11 --- /dev/null +++ b/tests/filterTests.test.js @@ -0,0 +1,50 @@ +import { describe, it, expect } from "vitest"; +import { selectTestIds } from "../src/filterTests.js"; + +const handlers = [ + { id: 's1', name: 'Login', parent: undefined, type: 'suite' }, + { id: 't1', name: 'shows error on bad password', parent: 's1', type: 'test' }, + { id: 't2', name: 'redirects on success', parent: 's1', type: 'test' }, + { id: 's2', name: 'Signup', parent: undefined, type: 'suite' }, + { id: 't3', name: 'shows error on taken email', parent: 's2', type: 'test' }, +]; + +describe("selectTestIds", () => { + it("matches a leaf test name by case-insensitive substring", () => { + const { ids, unmatchedFilters } = selectTestIds(handlers, ['REDIRECTS']); + expect(ids).toEqual(['t2']); + expect(unmatchedFilters).toEqual([]); + }); + + it("matches all tests under a describe via the full path", () => { + const { ids } = selectTestIds(handlers, ['Login']); + expect(ids.sort()).toEqual(['t1', 't2']); + }); + + it("treats multiple filters as OR", () => { + const { ids } = selectTestIds(handlers, ['redirects', 'taken email']); + expect(ids.sort()).toEqual(['t2', 't3']); + }); + + it("matches the same substring across suites", () => { + const { ids } = selectTestIds(handlers, ['shows error']); + expect(ids.sort()).toEqual(['t1', 't3']); + }); + + it("reports filters that matched nothing", () => { + const { ids, unmatchedFilters } = selectTestIds(handlers, ['Login', 'nope']); + expect(ids.sort()).toEqual(['t1', 't2']); + expect(unmatchedFilters).toEqual(['nope']); + }); + + it("returns empty ids when nothing matches", () => { + const { ids, unmatchedFilters } = selectTestIds(handlers, ['zzz']); + expect(ids).toEqual([]); + expect(unmatchedFilters).toEqual(['zzz']); + }); + + it("ignores suite handlers as run targets", () => { + const { ids } = selectTestIds(handlers, ['Signup']); + expect(ids).toEqual(['t3']); // s2 (the suite) is never an id + }); +}); From 0c2d0851b1543db67a328ca2772f334c6d92a34f Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Wed, 24 Jun 2026 22:42:28 +0200 Subject: [PATCH 4/9] feat: add parseRunArgs CLI argument parser --- src/parseArgs.js | 18 ++++++++++++++++++ tests/parseArgs.test.js | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 src/parseArgs.js create mode 100644 tests/parseArgs.test.js diff --git a/src/parseArgs.js b/src/parseArgs.js new file mode 100644 index 0000000..8b73f91 --- /dev/null +++ b/src/parseArgs.js @@ -0,0 +1,18 @@ +export function parseRunArgs(argv) { + const testFilters = []; + + for (let i = 0; i < argv.length; i++) { + const token = argv[i]; + if (token === '--test') { + const value = argv[i + 1]; + if (value !== undefined) { + testFilters.push(value); + i++; + } + } else if (token.startsWith('--test=')) { + testFilters.push(token.slice('--test='.length)); + } + } + + return { testFilters }; +} diff --git a/tests/parseArgs.test.js b/tests/parseArgs.test.js new file mode 100644 index 0000000..6c9a33f --- /dev/null +++ b/tests/parseArgs.test.js @@ -0,0 +1,36 @@ +import { describe, it, expect } from "vitest"; +import { parseRunArgs } from "../src/parseArgs.js"; + +describe("parseRunArgs", () => { + it("returns empty filters when no args", () => { + expect(parseRunArgs([])).toEqual({ testFilters: [] }); + }); + + it("parses a single --test ", () => { + expect(parseRunArgs(['--test', 'shows error'])).toEqual({ + testFilters: ['shows error'], + }); + }); + + it("parses repeated --test flags in order", () => { + expect(parseRunArgs(['--test', 'Login', '--test', 'Signup'])).toEqual({ + testFilters: ['Login', 'Signup'], + }); + }); + + it("parses the --test= form", () => { + expect(parseRunArgs(['--test=Login'])).toEqual({ + testFilters: ['Login'], + }); + }); + + it("ignores a trailing --test with no value", () => { + expect(parseRunArgs(['--test'])).toEqual({ testFilters: [] }); + }); + + it("ignores unknown tokens", () => { + expect(parseRunArgs(['--verbose', '--test', 'Login'])).toEqual({ + testFilters: ['Login'], + }); + }); +}); From bfc0943d5836174c7cfc8b3e31022b428ae78a43 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Wed, 24 Jun 2026 22:45:40 +0200 Subject: [PATCH 5/9] feat: filter tests via --test in runTests --- src/index.js | 53 +++++++++++++++++++++--- tests/runTests.test.js | 92 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 8 deletions(-) diff --git a/src/index.js b/src/index.js index dc2c5ae..81c69a9 100644 --- a/src/index.js +++ b/src/index.js @@ -8,6 +8,7 @@ import { printContractReport } from './contractReport.js'; import { generateContractMarkdown } from './contractMarkdown.js'; import { buildTestPath } from './buildTestPath.js'; import { formatTestSummary, formatFailedTestsBlock } from './testSummary.js'; +import { selectTestIds } from './filterTests.js'; function isProtocolTimeout(error) { const message = error && error.message ? error.message : ''; @@ -17,7 +18,8 @@ function isProtocolTimeout(error) { ); } -export async function runTests() { +export async function runTests(options = {}) { + const { testFilters = [] } = options; let browser; try { const config = loadConfig(); @@ -63,8 +65,42 @@ export async function runTests() { await page.waitForSelector('#twd-sidebar-root', { timeout: config.timeout }); console.log('Page loaded. Starting tests...'); + // Resolve --test filters to a concrete set of test ids (null = run all) + let selectedIds = null; + if (testFilters.length > 0) { + const registeredHandlers = await page.evaluate(() => { + const state = window.__TWD_STATE__; + if (!state || !state.handlers) return []; + return Array.from(state.handlers.values()).map((h) => ({ + id: h.id, + name: h.name, + parent: h.parent, + type: h.type, + })); + }); + + const { ids, unmatchedFilters } = selectTestIds(registeredHandlers, testFilters); + + if (ids.length === 0) { + console.error( + `No tests matched filter(s): ${testFilters.map((f) => `"${f}"`).join(', ')}` + ); + await browser.close(); + return true; + } + + if (unmatchedFilters.length > 0) { + console.warn( + `Warning: no tests matched: ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}` + ); + } + + selectedIds = ids; + console.log(`Filtering: running ${ids.length} test(s) matching --test filter(s).`); + } + // Execute all tests - const { handlers, testStatus } = await page.evaluate(async (retryCount) => { + const { handlers, testStatus } = await page.evaluate(async (retryCount, selectedIds) => { const TestRunner = window.__testRunner; const testStatus = []; const runner = new TestRunner({ @@ -86,9 +122,11 @@ export async function runTests() { testStatus.push({ id: test.id, status: "skip" }); }, }, { retryCount }); - const handlers = await runner.runAll(); + const handlers = selectedIds + ? await runner.runByIds(selectedIds) + : await runner.runAll(); return { handlers: Array.from(handlers.values()), testStatus }; - }, config.retryCount); + }, config.retryCount, selectedIds); const durationMs = Date.now() - startedAt; @@ -143,8 +181,11 @@ export async function runTests() { } } - // Handle code coverage if enabled - if (config.coverage && !hasFailures) { + // Handle code coverage if enabled (skipped when a --test filter is active) + if (selectedIds && config.coverage) { + console.log('Skipping coverage collection (test filter active).'); + } + if (config.coverage && !hasFailures && !selectedIds) { const coverage = await page.evaluate(() => window.__coverage__); if (coverage) { console.log('Collecting code coverage data...'); diff --git a/tests/runTests.test.js b/tests/runTests.test.js index 960fc64..d88cd5c 100644 --- a/tests/runTests.test.js +++ b/tests/runTests.test.js @@ -77,8 +77,8 @@ describe("runTests", () => { await runTests(); - // page.evaluate is called with (fn, retryCount) - expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3); + // page.evaluate is called with (fn, retryCount, selectedIds) + expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3, null); }); it("should pass protocolTimeout to puppeteer.launch", async () => { @@ -278,6 +278,94 @@ describe("runTests", () => { expect(entries[0].occurrence).toBe(1); }); + it("passes selectedIds=null to the run evaluate when no filter", async () => { + const testStatus = [{ id: '1', status: 'pass' }]; + const handlers = [{ id: '1', name: 'test1', type: 'test' }]; + const page = createMockPage({ handlers, testStatus }); + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + + await runTests(); + + expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 2, null); + }); + + it("runs only matching tests when a --test filter is given", async () => { + const registry = [ + { id: 's1', name: 'Login', parent: undefined, type: 'suite' }, + { id: 't1', name: 'shows error', parent: 's1', type: 'test' }, + { id: 't2', name: 'redirects', parent: 's1', type: 'test' }, + ]; + const runResult = { + handlers: registry, + testStatus: [{ id: 't1', status: 'pass' }], + }; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn() + .mockResolvedValueOnce(registry) // enumeration pass + .mockResolvedValueOnce(runResult), // run pass + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + + const result = await runTests({ testFilters: ['shows error'] }); + + expect(result).toBe(false); + // second evaluate call is the run; selectedIds is the matched ids + expect(page.evaluate).toHaveBeenNthCalledWith(2, expect.any(Function), 2, ['t1']); + }); + + it("returns true and skips the run when a filter matches nothing", async () => { + const registry = [ + { id: 't1', name: 'shows error', parent: undefined, type: 'test' }, + ]; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn().mockResolvedValueOnce(registry), + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + + const result = await runTests({ testFilters: ['nope'] }); + + expect(result).toBe(true); + expect(page.evaluate).toHaveBeenCalledTimes(1); // enumeration only, no run + expect(browser.close).toHaveBeenCalled(); + const errors = errorSpy.mock.calls.map((c) => String(c[0])); + expect(errors.some((e) => e.includes('No tests matched') && e.includes('nope'))).toBe(true); + errorSpy.mockRestore(); + }); + + it("skips coverage collection when a filter is active", async () => { + const registry = [ + { id: 't1', name: 'shows error', parent: undefined, type: 'test' }, + ]; + const runResult = { handlers: registry, testStatus: [{ id: 't1', status: 'pass' }] }; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn() + .mockResolvedValueOnce(registry) + .mockResolvedValueOnce(runResult), + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + vi.mocked(loadConfig).mockReturnValue({ ...defaultMockConfig, coverage: true }); + + await runTests({ testFilters: ['shows error'] }); + + // only the 2 evaluate calls happened (enumeration + run); coverage would be a 3rd + expect(page.evaluate).toHaveBeenCalledTimes(2); + expect(fs.writeFileSync).not.toHaveBeenCalled(); + }); + it("should print the Tests: summary line and Failed tests block", async () => { const testStatus = [ { id: '1', status: 'pass' }, From f5b191858d6325b8f55371fe5b28c3425e938902 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Wed, 24 Jun 2026 22:48:57 +0200 Subject: [PATCH 6/9] feat: parse --test flag in CLI entry point --- bin/twd-cli.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bin/twd-cli.js b/bin/twd-cli.js index 2c2b6c7..914fbd8 100755 --- a/bin/twd-cli.js +++ b/bin/twd-cli.js @@ -1,12 +1,14 @@ #!/usr/bin/env node import { runTests } from '../src/index.js'; +import { parseRunArgs } from '../src/parseArgs.js'; const command = process.argv[2]; if (command === 'run') { try { - const hasFailures = await runTests(); + const { testFilters } = parseRunArgs(process.argv.slice(3)); + const hasFailures = await runTests({ testFilters }); process.exit(hasFailures ? 1 : 0); } catch (error) { process.exit(1); @@ -16,7 +18,14 @@ if (command === 'run') { twd-cli - Test runner for TWD tests Usage: - npx twd-cli run Run all tests + npx twd-cli run Run all tests + npx twd-cli run --test "" Run only tests whose "suite > test" path + contains (case-insensitive). + Repeatable; multiple --test values are OR'd. + +Examples: + npx twd-cli run --test "shows error" + npx twd-cli run --test "Login" --test "Signup" Options: Create a twd.config.json file in your project root to customize settings. From 12322c8c93f5ca917fce54268053ec0a2fc04b25 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Wed, 24 Jun 2026 22:50:16 +0200 Subject: [PATCH 7/9] docs: document --test filter flag --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index 181ca20..5fee22e 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,31 @@ Run tests with default configuration: npx twd-cli run ``` +### Filtering tests + +Run only a subset of tests with the repeatable `--test` flag. Matching is +**case-insensitive** and matches a **substring** of each test's full +`"Suite > test name"` path: + +```bash +# Run every test whose name contains "shows error" +npx twd-cli run --test "shows error" + +# Because matching uses the full "suite > test" path, passing a describe +# name runs every test inside that describe block: +npx twd-cli run --test "Login" + +# Multiple --test flags are combined with OR (a test runs if it matches any): +npx twd-cli run --test "Login" --test "Signup" +``` + +Notes: + +- If no test matches any filter, the run exits with code `1` and prints + `No tests matched filter(s): …` — so a typo won't silently look like a pass. +- Code coverage collection is skipped while a `--test` filter is active, since a + filtered run is a partial (debug) run. + ### Configuration Create a `twd.config.json` file in your project root: From 966967b56a8d5bbb693d4ec78fac6f9a79f0c40d Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Wed, 24 Jun 2026 22:53:05 +0200 Subject: [PATCH 8/9] refactor: clarify partial-match filter warning and cover it with a test --- src/index.js | 2 +- tests/runTests.test.js | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/index.js b/src/index.js index 81c69a9..da40a61 100644 --- a/src/index.js +++ b/src/index.js @@ -91,7 +91,7 @@ export async function runTests(options = {}) { if (unmatchedFilters.length > 0) { console.warn( - `Warning: no tests matched: ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}` + `Warning: these filter(s) matched no tests (others did): ${unmatchedFilters.map((f) => `"${f}"`).join(', ')}` ); } diff --git a/tests/runTests.test.js b/tests/runTests.test.js index d88cd5c..d7f9c59 100644 --- a/tests/runTests.test.js +++ b/tests/runTests.test.js @@ -342,6 +342,34 @@ describe("runTests", () => { errorSpy.mockRestore(); }); + it("warns about filters that matched nothing on a partial match", async () => { + const registry = [ + { id: 's1', name: 'Login', parent: undefined, type: 'suite' }, + { id: 't1', name: 'shows error', parent: 's1', type: 'test' }, + ]; + const runResult = { handlers: registry, testStatus: [{ id: 't1', status: 'pass' }] }; + const page = { + goto: vi.fn(), + waitForSelector: vi.fn(), + exposeFunction: vi.fn(), + evaluate: vi.fn() + .mockResolvedValueOnce(registry) + .mockResolvedValueOnce(runResult), + }; + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const result = await runTests({ testFilters: ['Login', 'nope'] }); + + expect(result).toBe(false); + expect(page.evaluate).toHaveBeenNthCalledWith(2, expect.any(Function), 2, ['t1']); + const warnings = warnSpy.mock.calls.map((c) => String(c[0])); + expect(warnings.some((w) => w.includes('matched no tests') && w.includes('nope'))).toBe(true); + expect(warnings.some((w) => w.includes('"Login"'))).toBe(false); + warnSpy.mockRestore(); + }); + it("skips coverage collection when a filter is active", async () => { const registry = [ { id: 't1', name: 'shows error', parent: undefined, type: 'test' }, From 477e3d6617dec72fb686c1b66224aa6c59565cf4 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Thu, 25 Jun 2026 23:19:35 +0200 Subject: [PATCH 9/9] chore: re-trigger PR checks