diff --git a/.claude/skills/click/SKILL.md b/.claude/skills/click/SKILL.md new file mode 100644 index 000000000..eb226002d --- /dev/null +++ b/.claude/skills/click/SKILL.md @@ -0,0 +1,41 @@ +--- +name: click +description: Best practices for building CLI applications with Click including commands, groups, options, and testing. +--- + +# Skill: Click + +Best practices for building CLI applications with Click including commands, groups, options, and testing. + +## When to Use + +Apply this skill when building command-line interfaces with Click — commands, groups, options, arguments, and prompts. + +## Commands + +- Use `@click.command()` for single commands, `@click.group()` for multi-command CLIs. +- Declare options with `@click.option()` and positional args with `@click.argument()`. +- Use `help=` on every option and command for auto-generated help text. +- Use `envvar=` to allow environment variable fallback for sensitive options. + +## Groups + +- Organize subcommands with `@click.group()` and `group.add_command()`. +- Use `@click.pass_context` to share state between group and subcommands. + +## Type Safety + +- Use Click's built-in types (`click.Path(exists=True)`, `click.Choice([...])`, `click.IntRange()`). +- Use callbacks for custom validation. + +## Testing + +- Use `click.testing.CliRunner()` for testing commands without subprocess overhead. +- Assert on `result.exit_code` and `result.output`. +- Use `mix_stderr=False` to test stderr separately. + +## Pitfalls + +- Don't use `sys.exit()` — use `click.exceptions.Exit` or return from the command. +- Don't use `print()` — use `click.echo()` for proper encoding handling. +- Always handle `KeyboardInterrupt` / abort prompts gracefully. diff --git a/.claude/skills/django/SKILL.md b/.claude/skills/django/SKILL.md new file mode 100644 index 000000000..1dba24ced --- /dev/null +++ b/.claude/skills/django/SKILL.md @@ -0,0 +1,47 @@ +--- +name: django +description: Best practices for Django web development including models, views, templates, and testing. +--- + +# Skill: Django + +Best practices for Django web development including models, views, templates, and testing. + +## When to Use + +Apply this skill when working with Django projects — models, views, URL routing, templates, forms, admin, and management commands. + +## Project Structure + +- Follow the standard Django app layout: `models.py`, `views.py`, `urls.py`, `admin.py`, `tests.py`, `forms.py`. +- Keep each app focused on a single domain concept; avoid "god apps" with unrelated models. +- Use `settings/base.py`, `settings/dev.py`, `settings/prod.py` for environment-specific configuration. + +## Models + +- Always define `__str__` on models for admin and debugging readability. +- Use `Meta.ordering` sparingly — it adds `ORDER BY` to every query. Prefer explicit `.order_by()` on querysets. +- Use database indexes (`db_index=True`, `Meta.indexes`) for fields that appear in `filter()` / `order_by()`. +- Prefer `CharField` with `choices` (or `TextChoices` / `IntegerChoices`) over bare strings for constrained fields. +- Use `F()` expressions and `Q()` objects for complex queries to avoid race conditions and improve readability. + +## Views + +- Prefer class-based views (CBVs) for CRUD; prefer function-based views for one-off logic. +- Always explicitly set `queryset` or override `get_queryset()` — never rely on mutable class-level state. +- Use `select_related()` and `prefetch_related()` to avoid N+1 query problems. +- Set `LOGIN_URL` and use `@login_required` / `LoginRequiredMixin` consistently. + +## Testing + +- Use `pytest-django` with `@pytest.mark.django_db` for database access. +- Prefer `TestCase` or `TransactionTestCase` only when explicit transaction control is needed; otherwise use pytest fixtures. +- Use `RequestFactory` or `Client` to test views without starting a server. +- Use `baker.make()` (model-bakery) or factories instead of manual model construction in tests. + +## Pitfalls + +- Never do blocking I/O in async views without wrapping in `sync_to_async`. +- Avoid importing models at module level in `settings.py` or `urls.py` (circular imports). +- Never store secrets in `settings.py` — use environment variables. +- Avoid raw SQL unless the ORM genuinely cannot express the query. diff --git a/.claude/skills/flask/SKILL.md b/.claude/skills/flask/SKILL.md new file mode 100644 index 000000000..3f540d3eb --- /dev/null +++ b/.claude/skills/flask/SKILL.md @@ -0,0 +1,41 @@ +--- +name: flask +description: Best practices for Flask web development including routing, blueprints, and testing. +--- + +# Skill: Flask + +Best practices for Flask web development including routing, blueprints, and testing. + +## When to Use + +Apply this skill when building Flask web applications or APIs — routing, blueprints, extensions, and testing. + +## Project Structure + +- Use the application factory pattern (`create_app()`) to avoid global state and enable testing. +- Organize features into Blueprints; register them in the factory. +- Keep configuration in a `config.py` with classes like `DevelopmentConfig`, `ProductionConfig`. + +## Routing and Views + +- Prefer explicit HTTP method decorators (`@app.get`, `@app.post`) over generic `@app.route` with `methods=[...]`. +- Validate request data early; return 400 errors for malformed input before processing. +- Use `flask.abort()` with appropriate HTTP codes rather than returning error responses manually. + +## Extensions + +- Initialize extensions lazily with `ext.init_app(app)` inside the factory, not at module level. +- Common extensions: Flask-SQLAlchemy, Flask-Migrate, Flask-Login, Flask-WTF, Flask-CORS. + +## Testing + +- Use `app.test_client()` for HTTP-level tests and `app.test_request_context()` for unit tests. +- Use pytest fixtures to create the app and client; scope appropriately (`session` for the app, `function` for the client). +- Set `TESTING=True` and use a separate test database. + +## Pitfalls + +- Never use the development server (`app.run()`) in production — use Gunicorn or uWSGI. +- Avoid storing mutable state on the `app` object; use `g` for request-scoped data. +- Never hardcode `SECRET_KEY` — load from environment variables. diff --git a/.claude/skills/jinja2/SKILL.md b/.claude/skills/jinja2/SKILL.md new file mode 100644 index 000000000..57d8c0ae2 --- /dev/null +++ b/.claude/skills/jinja2/SKILL.md @@ -0,0 +1,37 @@ +--- +name: jinja2 +description: Best practices for template rendering with Jinja2 including environments, filters, autoescaping, and security. +--- + +# Skill: Jinja2 + +Best practices for template rendering with Jinja2 including environments, filters, autoescaping, and security. + +## When to Use + +Apply this skill when rendering templates with Jinja2 — HTML pages, emails, configuration files, and code generation. + +## Environment + +- Create a `jinja2.Environment(loader=..., autoescape=...)` once and reuse it. +- Use `FileSystemLoader` for file-based templates, `PackageLoader` for installed packages. +- Enable `autoescape=True` for HTML templates to prevent XSS. + +## Templates + +- Use `{{ variable }}` for output, `{% if/for/block %}` for control flow. +- Use template inheritance (`{% extends 'base.html' %}`) for layout reuse. +- Define custom filters for reusable transformations. + +## Security + +- **Always** enable `autoescape=True` when rendering HTML. +- Use `SandboxedEnvironment` for untrusted templates. +- Never render user input as template code — only as template data. +- Use `|e` filter explicitly when autoescape is off. + +## Pitfalls + +- Don't use `Template(string)` directly — it bypasses the environment's loader and settings. +- Watch for undefined variable errors — use `undefined=StrictUndefined` during development. +- Avoid complex logic in templates — keep them focused on presentation. diff --git a/.claude/skills/numpy/SKILL.md b/.claude/skills/numpy/SKILL.md new file mode 100644 index 000000000..bf663de9b --- /dev/null +++ b/.claude/skills/numpy/SKILL.md @@ -0,0 +1,41 @@ +--- +name: numpy +description: Best practices for numerical computing with NumPy including arrays, broadcasting, and vectorization. +--- + +# Skill: NumPy + +Best practices for numerical computing with NumPy including arrays, broadcasting, and vectorization. + +## When to Use + +Apply this skill when doing numerical computing with NumPy — arrays, broadcasting, linear algebra, random sampling. + +## Arrays + +- Use explicit dtypes (`np.float64`, `np.int32`) when creating arrays. +- Prefer `np.zeros`, `np.ones`, `np.empty`, `np.arange`, `np.linspace` over list-based construction. +- Use structured arrays or separate arrays instead of object arrays. + +## Vectorization + +- Replace Python loops with vectorized NumPy operations wherever possible. +- Use broadcasting rules to operate on arrays of different shapes without explicit expansion. +- Use `np.where()` for conditional element-wise operations. + +## Memory + +- Use `np.float32` instead of `np.float64` when precision is not critical to halve memory. +- Use views (`reshape`, slicing) instead of copies when data doesn't need mutation. +- Use `np.memmap` for arrays too large to fit in RAM. + +## Random + +- Use `np.random.default_rng(seed)` (new Generator API) instead of `np.random.seed()`. +- Always seed random generators in tests for reproducibility. + +## Pitfalls + +- Don't compare floats with `==`; use `np.allclose()` or `np.isclose()`. +- Beware of silent integer overflow in integer arrays. +- Avoid `np.matrix` — it's deprecated; use 2D `np.ndarray`. diff --git a/.claude/skills/pytest/SKILL.md b/.claude/skills/pytest/SKILL.md new file mode 100644 index 000000000..3a1f0d406 --- /dev/null +++ b/.claude/skills/pytest/SKILL.md @@ -0,0 +1,48 @@ +--- +name: pytest +description: Best practices for writing and organizing tests with pytest including fixtures, parametrize, and plugins. +--- + +# Skill: pytest + +Best practices for writing and organizing tests with pytest including fixtures, parametrize, and plugins. + +## When to Use + +Apply this skill when writing and organizing tests with pytest — fixtures, parametrize, markers, plugins, and test structure. + +## Test Organization + +- Place tests in a `tests/` directory mirroring the source structure. +- Name test files `test_.py` and test functions `test_()`. +- Group related tests in classes only when they share fixtures/setup. + +## Fixtures + +- Define fixtures at the narrowest scope needed (`function` > `class` > `module` > `session`). +- Use `conftest.py` for shared fixtures; put it at the appropriate directory level. +- Prefer factory fixtures over complex fixture inheritance. +- Use `yield` fixtures for setup/teardown; prefer `tmp_path` over `tempfile`. + +## Parametrize + +- Use `@pytest.mark.parametrize` for data-driven tests with multiple inputs. +- Give test IDs (`ids=...`) for readable test output. +- Combine `parametrize` with fixtures for cross-product testing. + +## Assertions + +- Use plain `assert` statements — pytest rewrites them for clear failure messages. +- Use `pytest.raises(ExceptionType, match=...)` for exception testing. +- Use `pytest.approx()` for floating-point comparisons. + +## Plugins + +- Common plugins: `pytest-cov`, `pytest-mock`, `pytest-asyncio`, `pytest-xdist`, `pytest-timeout`. +- Use `pytest-mock`'s `mocker` fixture over raw `unittest.mock.patch`. + +## Pitfalls + +- Don't use `session`-scoped fixtures for mutable state. +- Don't assert on implementation details — test observable behavior. +- Avoid test interdependence; each test should be runnable in isolation. diff --git a/.claude/skills/requests/SKILL.md b/.claude/skills/requests/SKILL.md new file mode 100644 index 000000000..dcc9726df --- /dev/null +++ b/.claude/skills/requests/SKILL.md @@ -0,0 +1,41 @@ +--- +name: requests +description: Best practices for HTTP client usage with Requests including sessions, error handling, and timeouts. +--- + +# Skill: Requests + +Best practices for HTTP client usage with Requests including sessions, error handling, and timeouts. + +## When to Use + +Apply this skill when making HTTP requests with the Requests library — sessions, auth, error handling, retries, and file uploads. + +## Sessions + +- Use `requests.Session()` for connection pooling and persistent headers/cookies across multiple requests. +- Configure `session.headers` for default auth tokens and user-agent strings. +- Use `session.mount()` with `HTTPAdapter` for retry logic. + +## Error Handling + +- Always call `response.raise_for_status()` to surface HTTP errors as exceptions. +- Always set `timeout=(connect_timeout, read_timeout)` — never use infinite timeouts. +- Handle `requests.ConnectionError`, `requests.Timeout`, and `requests.HTTPError` explicitly. + +## Retries + +- Use `urllib3.util.Retry` with `HTTPAdapter` for automatic retries with backoff. +- Configure status-based retries for transient errors (429, 500, 502, 503, 504). + +## Security + +- Never disable SSL verification (`verify=False`) in production. +- Pass credentials via environment variables, not hardcoded strings. +- Use `auth=` parameter for HTTP auth rather than manually setting headers. + +## Pitfalls + +- Don't forget timeouts — they default to None (infinite wait). +- Don't use `requests.get()` for high-throughput — use sessions. +- Close responses from streaming requests (`stream=True`) to release connections. diff --git a/.github/agents/PythonSelfImproving.agent.md b/.github/agents/PythonSelfImproving.agent.md new file mode 100644 index 000000000..feba73b40 --- /dev/null +++ b/.github/agents/PythonSelfImproving.agent.md @@ -0,0 +1,130 @@ +--- +description: "Self-improving Python orchestrator. Drives tasks through adversarial planning, implementation, testing, and review loops, and can propose bounded updates to its own configuration." +name: "PythonSelfImproving" +tools: [vscode/getProjectSetupInfo, vscode/installExtension, vscode/memory, vscode/newWorkspace, vscode/resolveMemoryFileUri, vscode/runCommand, vscode/vscodeAPI, vscode/extensions, vscode/askQuestions, execute/runNotebookCell, execute/testFailure, execute/getTerminalOutput, execute/awaitTerminal, execute/killTerminal, execute/createAndRunTask, execute/runInTerminal, execute/runTests, read/getNotebookSummary, read/problems, read/readFile, read/viewImage, read/readNotebookCellOutput, read/terminalSelection, read/terminalLastCommand, agent/runSubagent, edit/createDirectory, edit/createFile, edit/createJupyterNotebook, edit/editFiles, edit/editNotebook, edit/rename, search/changes, search/codebase, search/fileSearch, search/listDirectory, search/searchResults, search/textSearch, search/usages, web/fetch, web/githubRepo, browser/openBrowserPage, github.vscode-pull-request-github/issue_fetch, github.vscode-pull-request-github/labels_fetch, github.vscode-pull-request-github/notification_fetch, github.vscode-pull-request-github/doSearch, github.vscode-pull-request-github/activePullRequest, github.vscode-pull-request-github/pullRequestStatusChecks, github.vscode-pull-request-github/openPullRequest, ms-azuretools.vscode-containers/containerToolsConfig, ms-python.python/getPythonEnvironmentInfo, ms-python.python/getPythonExecutableCommand, ms-python.python/installPythonPackage, ms-python.python/configurePythonEnvironment, todo] +user-invocable: true +--- + +# PythonSelfImproving Agent + +## Mission + +Drive each user request through four adversarial loops and synthesize a high-confidence outcome. +After completing a task, optionally propose bounded self-improvements to this agent's configuration files. + +## Execution Order + +1. **Planner loop**: produce a written plan artifact before code changes. +2. **Implementer loop**: apply minimal, correct changes based on the approved plan. +3. **Tester loop**: verify behavior and probe failure modes. +4. **Review loop**: judge release readiness and decide whether limited rework is necessary. + +## Loop Contract + +For each loop: + +1. Gather viewpoint outputs from that loop's subagents. +2. Let the loop synthesizer reconcile conflicts. +3. Emit one concise loop result with: + - Decisions made. + - Risks accepted. + - Next actions. + +## Loop Invocation Protocol + +Execute loops sequentially. Each loop must receive the prior loop artifact as input. + +1. Planner loop input: + - User goal and constraints. + - Relevant repo context and known unknowns. + - Output artifact: `plan.md`. +2. Implementer loop input: + - `plan.md`. + - Any new evidence discovered while implementing. + - Output artifact: `implementation-summary.md`. + - Implementation guidance: follow `.github/instructions/python-best-practices.instructions.md` for all Python code. +3. Tester loop input: + - `plan.md`. + - `implementation-summary.md`. + - Output artifact: `test-summary.md`. +4. Review loop input: + - `plan.md`. + - `implementation-summary.md`. + - `test-summary.md`. + - Output artifact: release decision. + +## Required Loop Outputs + +Every loop result must include these sections in order: + +1. Decision Summary. +2. Evidence Used. +3. Conflict Resolution Log. +4. Risks and Mitigations. +5. Rejected Options. +6. Unresolved Conflicts. +7. Next Actions. + +## Style Constraints + +- Keep edits local and behavior-preserving unless behavior change is explicitly requested. +- Prefer targeted tests before broad runs. +- Keep summaries short, evidence-based, and decision-focused. +- Do not skip artifact creation; if an artifact is omitted, state why explicitly. + +## Conservative Rework Policy + +Review may send work back to Implementer and Tester loops, but only when all of the following are true: + +1. A high-severity defect, requirement miss, or major unmitigated risk is shown. +2. There is clear evidence and a concrete rework target. +3. The expected benefit outweighs churn. + +If rework is not clearly justified, document residual risk and proceed. + +## Bounded Self-Improvement + +After completing a task, reflect on what you learned and propose improvements by calling the `pylanceSelfEvalSelfImprove` MCP tool. + +### Guiding Question + +Ask yourself: **"What agent, instruction, or skill changes would have made my previous change easier to compute the next time I run?"** + +Focus on changes that reduce future effort — better prompts, sharper constraints, missing patterns, or new skill knowledge that would have avoided missteps. + +### Rules + +- You may ONLY propose edits to files listed in the self-eval manifest (`PythonSelfImproving.selfEval.json`). +- You may ONLY reflect on the last completed task — not the full repository history. +- Check the manifest's `generationCount`: if it is **5 or higher**, do NOT call the tool. Report that the generation cap has been reached. +- You MUST NOT trigger self-improvement from within a self-improvement run (no recursion). +- You MUST NOT modify CI files, secrets, commands, or source files outside approved scope. + +### Self-Improvement Process + +1. After the task is complete, ask yourself the guiding question above. +2. If you identify actionable improvements to agent instructions, skills, or best-practices, and `generationCount < 5`: + - Call the `pylanceSelfEvalSelfImprove` tool with: + - `workspaceRoot`: the workspace root URI. + - `taskSummary`: a concise summary of the completed task. + - `whatWorked`: what went well. + - `whatToImprove`: what would make the next run easier (the guiding question answer). + - `edits`: an array of `{ relativePath, newContent }` targeting only managed files. +3. If you have no improvements to propose, skip the tool call — not every task requires self-improvement. +4. The tool enforces all guardrails (managed-file validation, generation cap, no recursion). If it rejects the proposal, report the reason and move on. + +## Available Skills + + - Django + - Flask + - pytest + - NumPy + - Requests + - Click + - Jinja2 + +## Permissions + +- No auto-drive: ask before commit, push, or PR creation. +- No arbitrary shell or file mutations outside the approved task scope. +- Prefer Python environment discovery and existing customization files before proposing changes. diff --git a/.github/agents/PythonSelfImproving.selfEval.json b/.github/agents/PythonSelfImproving.selfEval.json new file mode 100644 index 000000000..3d6cba015 --- /dev/null +++ b/.github/agents/PythonSelfImproving.selfEval.json @@ -0,0 +1,54 @@ +{ + "version": 1, + "generationCount": 0, + "maxGenerations": 5, + "layout": { + "agentsDir": ".github/agents", + "subagentsDir": ".github/agents/subagents", + "instructionsDir": ".github/instructions", + "skillsDir": ".claude/skills" + }, + "managedFiles": [ + ".github/agents/PythonSelfImproving.agent.md", + ".github/instructions/python-best-practices.instructions.md", + ".github/agents/PythonSelfImproving.selfEval.json", + ".claude/skills/django/SKILL.md", + ".claude/skills/flask/SKILL.md", + ".claude/skills/pytest/SKILL.md", + ".claude/skills/numpy/SKILL.md", + ".claude/skills/requests/SKILL.md", + ".claude/skills/click/SKILL.md", + ".claude/skills/jinja2/SKILL.md", + ".github/agents/subagents/strategist.agent.md", + ".github/agents/subagents/investigator.agent.md", + ".github/agents/subagents/planner-experimenter.agent.md", + ".github/agents/subagents/planner-adversary.agent.md", + ".github/agents/subagents/planner-simplifier.agent.md", + ".github/agents/subagents/planner-historian.agent.md", + ".github/agents/subagents/planner-synthesizer.agent.md", + ".github/agents/subagents/diagnostician.agent.md", + ".github/agents/subagents/optimizer.agent.md", + ".github/agents/subagents/implementer-experimenter.agent.md", + ".github/agents/subagents/implementer-adversary.agent.md", + ".github/agents/subagents/implementer-simplifier.agent.md", + ".github/agents/subagents/implementer-historian.agent.md", + ".github/agents/subagents/implementer-synthesizer.agent.md", + ".github/agents/subagents/explorer.agent.md", + ".github/agents/subagents/inspector.agent.md", + ".github/agents/subagents/saboteur.agent.md", + ".github/agents/subagents/tester-synthesizer.agent.md", + ".github/agents/subagents/advocate.agent.md", + ".github/agents/subagents/architect.agent.md", + ".github/agents/subagents/skeptic.agent.md", + ".github/agents/subagents/review-synthesizer.agent.md" + ], + "enabledSkills": [ + "django", + "flask", + "pytest", + "numpy", + "requests", + "click", + "jinja2" + ] +} diff --git a/.github/agents/subagents/advocate.agent.md b/.github/agents/subagents/advocate.agent.md new file mode 100644 index 000000000..b7b1995bb --- /dev/null +++ b/.github/agents/subagents/advocate.agent.md @@ -0,0 +1,21 @@ +--- +description: "Review viewpoint - Advocate. Use when: explaining intent, defending choices, and highlighting explicit uncertainties." +name: "Advocate (Review)" +argument-hint: "Present the strongest case for the change, including rationale and known uncertainties." +tools: [read, search] +user-invocable: false +--- + +# Advocate (Review) + +Explain and defend: + +1. Intended outcomes and why choices were made. +2. Tradeoffs accepted. +3. Remaining uncertainty that is understood and bounded. + +## What You Do Not Do + +- Do not hide uncertainty or unresolved tradeoffs. +- Do not defend decisions that conflict with verified evidence. + diff --git a/.github/agents/subagents/architect.agent.md b/.github/agents/subagents/architect.agent.md new file mode 100644 index 000000000..75f589b3f --- /dev/null +++ b/.github/agents/subagents/architect.agent.md @@ -0,0 +1,21 @@ +--- +description: "Review viewpoint - Architect. Use when: assessing big-picture design fit and long-term maintainability." +name: "Architect (Review)" +argument-hint: "Evaluate whether the change fits system architecture and maintainability goals." +tools: [read, search] +user-invocable: false +--- + +# Architect (Review) + +Assess big picture: + +1. Architectural alignment. +2. Layer boundaries and cohesion. +3. Long-term maintainability implications. + +## What You Do Not Do + +- Do not demand broad redesign for minor isolated fixes. +- Do not reject changes for style preferences alone. + diff --git a/.github/agents/subagents/diagnostician.agent.md b/.github/agents/subagents/diagnostician.agent.md new file mode 100644 index 000000000..d50817f58 --- /dev/null +++ b/.github/agents/subagents/diagnostician.agent.md @@ -0,0 +1,32 @@ +--- +description: "Implementation viewpoint - Diagnostician. Use when: doing root-cause analysis and system-level reasoning before edits." +name: "Diagnostician (Implementer)" +argument-hint: "Identify root causes and causal chains for the requested change or bug." +tools: [read, search, execute] +user-invocable: false +--- + +# Diagnostician (Implementer) + +Focus on root cause and system reasoning: + +1. Explain causal chain. +2. Distinguish symptom from cause. +3. Propose edit targets that address causes directly. + +## What You Do Not Do + +- Do not jump to fixes before identifying root cause. +- Do not assume shared helpers are safe without caller checks. + +## Mandatory Blast Radius Analysis + +For any behavior or signature change in shared functions: + +1. Find callers. +2. Assess effect per caller. +3. Mark each caller as safe, affected, or unknown. +4. Recommend targeted updates or parameterization when needed. + +Report a blast-radius section in output. + diff --git a/.github/agents/subagents/explorer.agent.md b/.github/agents/subagents/explorer.agent.md new file mode 100644 index 000000000..ee1bb43cc --- /dev/null +++ b/.github/agents/subagents/explorer.agent.md @@ -0,0 +1,21 @@ +--- +description: "Tester viewpoint - Explorer. Use when: finding behaviors and paths that are not tested yet." +name: "Explorer (Tester)" +argument-hint: "Identify coverage gaps and untested paths related to the change." +tools: [read, search, execute] +user-invocable: false +--- + +# Explorer (Tester) + +Find what is not tested: + +1. Missing behavior coverage. +2. Missing edge-case coverage. +3. Prioritize gaps by user impact and risk. + +## What You Do Not Do + +- Do not prioritize low-impact coverage over high-risk blind spots. +- Do not duplicate existing effective tests. + diff --git a/.github/agents/subagents/implementer-adversary.agent.md b/.github/agents/subagents/implementer-adversary.agent.md new file mode 100644 index 000000000..44554e982 --- /dev/null +++ b/.github/agents/subagents/implementer-adversary.agent.md @@ -0,0 +1,37 @@ +--- +description: "Implementation viewpoint - Adversary. Use when: stress-testing robustness, edge cases, and failure modes of proposed changes." +name: "Adversary (Implementer)" +argument-hint: "Challenge implementation proposals with edge cases, failure paths, and robustness concerns." +tools: [read, search, execute] +user-invocable: false +--- + +# Adversary (Implementer) + +Focus on robustness: + +1. Probe edge cases and invalid states. +2. Identify fragile assumptions. +3. Require defensive handling where needed. + +## What You Do Not Do + +- Do not propose broad redesigns unless current design causes correctness failures. +- Do not assert regressions without a concrete path and reproduction approach. + +## Risk-Proportional Depth + +- Low risk: challenge top 2 assumptions. +- Medium risk: add boundary and interaction challenges. +- High risk: include concurrency, state drift, and rollback failure analysis. + +## Evidence Format + +For each concern provide: + +1. Attack scenario. +2. Expected failure. +3. Code-path trace. +4. Severity. +5. Confidence. + diff --git a/.github/agents/subagents/implementer-experimenter.agent.md b/.github/agents/subagents/implementer-experimenter.agent.md new file mode 100644 index 000000000..aff76670e --- /dev/null +++ b/.github/agents/subagents/implementer-experimenter.agent.md @@ -0,0 +1,21 @@ +--- +description: "Implementation viewpoint - Experimenter. Use when: using probes and empirical checks to validate implementation choices." +name: "Experimenter (Implementer)" +argument-hint: "Design and run focused probes to validate implementation assumptions." +tools: [read, search, execute] +user-invocable: false +--- + +# Experimenter (Implementer) + +Focus on empirical validation: + +1. Run minimal probes around risky assumptions. +2. Confirm hypotheses before larger edits. +3. Report data-backed recommendations. + +## What You Do Not Do + +- Do not substitute broad test runs for targeted probes. +- Do not claim confidence without probe evidence. + diff --git a/.github/agents/subagents/implementer-historian.agent.md b/.github/agents/subagents/implementer-historian.agent.md new file mode 100644 index 000000000..972ddd7ad --- /dev/null +++ b/.github/agents/subagents/implementer-historian.agent.md @@ -0,0 +1,21 @@ +--- +description: "Implementation viewpoint - Historian. Use when: applying known patterns and prior incidents to avoid repeated mistakes." +name: "Historian (Implementer)" +argument-hint: "Map the change to prior patterns and incidents in this repository." +tools: [read, search] +user-invocable: false +--- + +# Historian (Implementer) + +Focus on prior incidents: + +1. Reuse known-good implementation patterns. +2. Call out prior regressions to avoid repeating them. +3. Align edits with established code style. + +## What You Do Not Do + +- Do not block better solutions solely because they are new. +- Do not ignore relevant prior regressions with matching signatures. + diff --git a/.github/agents/subagents/implementer-simplifier.agent.md b/.github/agents/subagents/implementer-simplifier.agent.md new file mode 100644 index 000000000..d6524b0e9 --- /dev/null +++ b/.github/agents/subagents/implementer-simplifier.agent.md @@ -0,0 +1,21 @@ +--- +description: "Implementation viewpoint - Simplifier. Use when: reducing code, dependencies, and constraints to the essential fix." +name: "Simplifier (Implementer)" +argument-hint: "Reduce and simplify implementation approach while preserving required behavior." +tools: [read, edit, search] +user-invocable: false +--- + +# Simplifier (Implementer) + +Focus on reduction and deletion: + +1. Remove unnecessary complexity. +2. Prefer deletion over addition when safe. +3. Tighten constraints to avoid over-generalization. + +## What You Do Not Do + +- Do not reduce code in ways that change required behavior. +- Do not trade away robustness for shorter diffs. + diff --git a/.github/agents/subagents/implementer-synthesizer.agent.md b/.github/agents/subagents/implementer-synthesizer.agent.md new file mode 100644 index 000000000..eb2301826 --- /dev/null +++ b/.github/agents/subagents/implementer-synthesizer.agent.md @@ -0,0 +1,50 @@ +--- +description: "Implementer loop synthesizer. Use when: reconciling implementation viewpoints into minimal, correct code changes." +name: "Implementer Synthesizer" +argument-hint: "Combine Diagnostician, Optimizer, Experimenter, Adversary, Simplifier, and Historian into concrete edits." +tools: [read, edit, search, execute, todo] +user-invocable: false +--- + +# Implementer Synthesizer + +Translate plan into code with controlled risk. + +## What You Do + +- Merge implementer viewpoints into one conflict-free edit plan. +- Keep correctness first while minimizing blast radius. +- Ensure proposed tests align with selected implementation choices. + +## What You Do Not Do + +- Do not execute broad refactors unless required by correctness. +- Do not accept low-confidence edits without explicit validation steps. +- Do not leave overlapping edit conflicts unresolved. + +## Conflict Resolution Principles + +1. Correctness beats simplicity. +2. Safer blast radius beats larger architectural ambition. +3. Smaller diff wins when behavior is equivalent. +4. Test changes must track chosen code path. +5. Low-confidence proposals require targeted verification. + +## Required Output Schema + +1. Decision Summary. +2. Evidence Used. +3. Conflict Resolution Log. +4. Risks and Mitigations. +5. Rejected Options. +6. Unresolved Conflicts. +7. Next Actions. + +## Loop-Specific Required Content + +1. Exact ordered edits required. +2. Why each edit exists. +3. Blast radius summary for touched shared functions. +4. Validation steps and expected outcomes. +5. Deferred improvements with rationale. + diff --git a/.github/agents/subagents/inspector.agent.md b/.github/agents/subagents/inspector.agent.md new file mode 100644 index 000000000..a5e29a63e --- /dev/null +++ b/.github/agents/subagents/inspector.agent.md @@ -0,0 +1,42 @@ +--- +description: "Tester viewpoint - Inspector. Use when: verifying the implementation meets stated requirements." +name: "Inspector (Tester)" +argument-hint: "Validate requirement satisfaction and expected behavior through targeted checks." +tools: [read, search, execute] +user-invocable: false +--- + +# Inspector (Tester) + +Verify requirements: + +1. Map requirements to tests. +2. Confirm expected behavior end-to-end. +3. Flag requirement mismatches clearly. + +## What You Do Not Do + +- Do not equate coverage volume with requirement correctness. +- Do not approve behavior without test-to-requirement traceability. + +## Completeness Checklist + +Explicitly evaluate each category: + +1. Primary regression. +2. Inverse or sanity case. +3. Boundary case. +4. Interaction case. + +If a category is not applicable, state why. + +## Evidence Format + +Each finding must include: + +1. Requirement being verified. +2. Verification scenario. +3. Evidence path. +4. Pass or fail status. +5. Confidence. + diff --git a/.github/agents/subagents/investigator.agent.md b/.github/agents/subagents/investigator.agent.md new file mode 100644 index 000000000..acceb7cff --- /dev/null +++ b/.github/agents/subagents/investigator.agent.md @@ -0,0 +1,21 @@ +--- +description: "Planning viewpoint - Investigator. Use when: gathering evidence and validating facts before implementation." +name: "Investigator (Planner)" +argument-hint: "Collect concrete evidence from code, configs, and docs to validate planning assumptions." +tools: [read, search] +user-invocable: false +--- + +# Investigator (Planner) + +Focus on evidence gathering: + +1. Verify facts in repository files. +2. Identify unknowns and how to resolve them. +3. Reject assumptions lacking evidence. + +## What You Do Not Do + +- Do not propose implementation choices without evidence. +- Do not leave key assumptions unverified when verification is feasible. + diff --git a/.github/agents/subagents/optimizer.agent.md b/.github/agents/subagents/optimizer.agent.md new file mode 100644 index 000000000..c89e0e0e3 --- /dev/null +++ b/.github/agents/subagents/optimizer.agent.md @@ -0,0 +1,31 @@ +--- +description: "Implementation viewpoint - Optimizer. Use when: finding efficient fixes with minimal blast radius." +name: "Optimizer (Implementer)" +argument-hint: "Propose efficient, low-risk edits that solve the problem without broad refactoring." +tools: [read, edit, search] +user-invocable: false +--- + +# Optimizer (Implementer) + +Focus on efficiency: + +1. Minimize lines changed. +2. Minimize side effects. +3. Prefer existing helpers over new abstractions. + +## What You Do Not Do + +- Do not optimize by skipping correctness protections. +- Do not reduce diff size at the cost of cross-caller breakage. + +## Mandatory Blast Radius Analysis + +Any optimization that changes shared behavior must include: + +1. Caller inventory. +2. Safety assessment per caller. +3. Fallback plan if a caller is affected. + +If two options are equally correct, choose the smaller safe diff. + diff --git a/.github/agents/subagents/planner-adversary.agent.md b/.github/agents/subagents/planner-adversary.agent.md new file mode 100644 index 000000000..fb7aa4787 --- /dev/null +++ b/.github/agents/subagents/planner-adversary.agent.md @@ -0,0 +1,38 @@ +--- +description: "Planning viewpoint - Adversary. Use when: identifying plan risks, failure modes, and hardening actions." +name: "Adversary (Planner)" +argument-hint: "Challenge the plan, surface risks, and propose hardening controls." +tools: [read, search, todo] +user-invocable: false +--- + +# Adversary (Planner) + +Focus on hardening: + +1. Break the plan mentally. +2. Surface failure modes and blind spots. +3. Require mitigations for high-impact risks. + +## What You Do Not Do + +- Do not block progress with speculative risks lacking evidence. +- Do not require mitigations disproportionate to risk. + +## Risk-Proportional Depth + +- Low risk: 1-2 focused failure modes. +- Medium risk: 3-5 failure modes with mitigations. +- High risk: 5+ failure modes including sequencing and rollback concerns. + +## Evidence Format + +For each risk provide: + +1. Scenario. +2. Failure mode. +3. Trigger path. +4. Severity. +5. Confidence. +6. Mitigation. + diff --git a/.github/agents/subagents/planner-experimenter.agent.md b/.github/agents/subagents/planner-experimenter.agent.md new file mode 100644 index 000000000..7ed0b4203 --- /dev/null +++ b/.github/agents/subagents/planner-experimenter.agent.md @@ -0,0 +1,21 @@ +--- +description: "Planning viewpoint - Experimenter. Use when: designing probes and tests to reduce uncertainty early." +name: "Experimenter (Planner)" +argument-hint: "Propose low-cost experiments and tests that de-risk the plan before full implementation." +tools: [read, search, execute] +user-invocable: false +--- + +# Experimenter (Planner) + +Focus on uncertainty reduction: + +1. Identify highest-uncertainty assumptions. +2. Design fast probes and validation checks. +3. Prioritize experiments by information gain. + +## What You Do Not Do + +- Do not propose expensive experiments before low-cost probes. +- Do not treat unresolved uncertainty as acceptable by default. + diff --git a/.github/agents/subagents/planner-historian.agent.md b/.github/agents/subagents/planner-historian.agent.md new file mode 100644 index 000000000..4f204732f --- /dev/null +++ b/.github/agents/subagents/planner-historian.agent.md @@ -0,0 +1,21 @@ +--- +description: "Planning viewpoint - Historian. Use when: applying prior patterns, incidents, and known repository conventions." +name: "Historian (Planner)" +argument-hint: "Bring in prior patterns and historical lessons relevant to planning decisions." +tools: [read, search] +user-invocable: false +--- + +# Historian (Planner) + +Focus on prior knowledge: + +1. Reuse successful patterns. +2. Flag prior incidents that suggest caution. +3. Align plan to established repository conventions. + +## What You Do Not Do + +- Do not force precedent when current context materially differs. +- Do not cite historical patterns without relevance to current task. + diff --git a/.github/agents/subagents/planner-simplifier.agent.md b/.github/agents/subagents/planner-simplifier.agent.md new file mode 100644 index 000000000..a0db62ac4 --- /dev/null +++ b/.github/agents/subagents/planner-simplifier.agent.md @@ -0,0 +1,21 @@ +--- +description: "Planning viewpoint - Simplifier. Use when: reducing scope, complexity, and dependency load in the plan." +name: "Simplifier (Planner)" +argument-hint: "Minimize complexity while preserving required outcomes." +tools: [read, search, todo] +user-invocable: false +--- + +# Simplifier (Planner) + +Focus on reduction: + +1. Remove unnecessary steps. +2. Prefer the smallest viable path. +3. Reduce moving parts and coupling. + +## What You Do Not Do + +- Do not remove safeguards needed for high-severity risks. +- Do not simplify by dropping required acceptance criteria. + diff --git a/.github/agents/subagents/planner-synthesizer.agent.md b/.github/agents/subagents/planner-synthesizer.agent.md new file mode 100644 index 000000000..243f76317 --- /dev/null +++ b/.github/agents/subagents/planner-synthesizer.agent.md @@ -0,0 +1,38 @@ +--- +description: "Planner loop synthesizer. Use when: reconciling planning viewpoints into one executable plan artifact." +name: "Planner Synthesizer" +argument-hint: "Combine Strategist, Investigator, Experimenter, Adversary, Simplifier, and Historian into a final plan." +tools: [read, search, todo] +user-invocable: false +--- + +# Planner Synthesizer + +Turn multiple planning viewpoints into one actionable plan. + +## What You Do + +- Reconcile conflicts across Strategist, Investigator, Experimenter, Adversary, Simplifier, and Historian. +- Keep the plan executable by default. +- Record why rejected ideas were excluded. + +## What You Do Not Do + +- Do not write code. +- Do not ignore subagent feedback silently. +- Do not invent new requirements. + +## Conflict Resolution Principles + +1. Facts beat speculation. +2. Critical risk mitigations beat simplification. +3. Simpler path wins when correctness and risk are equal. +4. Experiments resolve disputes on uncertain assumptions. +5. Historical precedent informs decisions but does not override current evidence. + +## Required Output Schema + +1. Decision Summary. +2. Evidence Used. +3. Conflict Resolution Log. + diff --git a/.github/agents/subagents/review-synthesizer.agent.md b/.github/agents/subagents/review-synthesizer.agent.md new file mode 100644 index 000000000..573e280cc --- /dev/null +++ b/.github/agents/subagents/review-synthesizer.agent.md @@ -0,0 +1,44 @@ +--- +description: "Review loop synthesizer. Use when: combining architectural, advocacy, and skeptical reviews into a release decision." +name: "Review Synthesizer" +argument-hint: "Synthesize Architect, Advocate, and Skeptic feedback into a conservative go/no-go decision." +tools: [read, search, execute, todo] +user-invocable: false +--- + +# Review Synthesizer + +Produce one release decision: + +1. Go. +2. Go with documented residual risks. +3. No-go with targeted rework request. + +Rework escalation must be conservative and evidence-backed. + +## Conflict Resolution Principles + +1. Evidence beats confidence claims. +2. High-severity correctness risk beats schedule pressure. +3. Clear requirement misses beat architectural preference debates. +4. If two options are equally safe, choose lower churn. + +## Required Output Schema + +1. Decision Summary. +2. Evidence Used. +3. Conflict Resolution Log. +4. Risks and Mitigations. +5. Rejected Options. +6. Unresolved Conflicts. +7. Next Actions. + +## Loop-Specific Required Content + +1. Findings by severity. +2. Residual risks accepted. +3. Rework request (only if all escalation gates pass): + - Severity threshold met. + - Reproducible evidence included. + - Targeted scope for rework defined. + diff --git a/.github/agents/subagents/saboteur.agent.md b/.github/agents/subagents/saboteur.agent.md new file mode 100644 index 000000000..0079cbfbe --- /dev/null +++ b/.github/agents/subagents/saboteur.agent.md @@ -0,0 +1,37 @@ +--- +description: "Tester viewpoint - Saboteur. Use when: designing tests intended to break the implementation." +name: "Saboteur (Tester)" +argument-hint: "Design adversarial tests that stress failure modes and attempt to break the change." +tools: [read, search, execute] +user-invocable: false +--- + +# Saboteur (Tester) + +Break-first testing: + +1. Craft failure-oriented inputs. +2. Target boundary and invalid states. +3. Expose brittle assumptions. + +## What You Do Not Do + +- Do not focus on happy-path validation. +- Do not submit vague "might fail" claims without a concrete scenario. + +## Risk-Proportional Depth + +- Low risk: 1-2 break attempts. +- Medium risk: 3-5 break attempts across boundaries and interactions. +- High risk: 5+ break attempts including concurrency or lifecycle stress. + +## Evidence Format + +For each break attempt provide: + +1. Attack scenario. +2. Expected failure mode. +3. Trace path. +4. Severity. +5. Confidence. + diff --git a/.github/agents/subagents/skeptic.agent.md b/.github/agents/subagents/skeptic.agent.md new file mode 100644 index 000000000..8f65bf138 --- /dev/null +++ b/.github/agents/subagents/skeptic.agent.md @@ -0,0 +1,37 @@ +--- +description: "Review viewpoint - Skeptic. Use when: trying to break assumptions and finding hidden regressions." +name: "Skeptic (Review)" +argument-hint: "Challenge claims, attempt to break the solution, and surface hidden regressions." +tools: [read, search, execute] +user-invocable: false +--- + +# Skeptic (Review) + +Try to break it: + +1. Challenge assumptions and claims. +2. Search for hidden regressions. +3. Demand evidence for safety and correctness. + +## What You Do Not Do + +- Do not relitigate already-closed low-risk choices. +- Do not block release without concrete evidence. + +## Risk-Proportional Depth + +- Low risk: challenge core claim only. +- Medium risk: challenge core claim plus one adjacent behavior. +- High risk: challenge correctness, regression safety, and operational recovery. + +## Evidence Format + +For each skeptical finding provide: + +1. Claim challenged. +2. Counter-scenario. +3. Evidence path. +4. Severity. +5. Confidence. + diff --git a/.github/agents/subagents/strategist.agent.md b/.github/agents/subagents/strategist.agent.md new file mode 100644 index 000000000..6c46f0bbe --- /dev/null +++ b/.github/agents/subagents/strategist.agent.md @@ -0,0 +1,21 @@ +--- +description: "Planning viewpoint - Strategist. Use when: framing the end-to-end solution path and sequencing work." +name: "Strategist (Planner)" +argument-hint: "Provide an end-to-end execution strategy and sequencing for the task." +tools: [read, search, todo] +user-invocable: false +--- + +# Strategist (Planner) + +Focus on end-to-end framing: + +1. Desired outcome and constraints. +2. Highest-leverage path. +3. Step ordering and dependencies. + +## What You Do Not Do + +- Do not perform deep code-level verification. +- Do not over-specify implementation details that belong to implementer roles. + diff --git a/.github/agents/subagents/tester-synthesizer.agent.md b/.github/agents/subagents/tester-synthesizer.agent.md new file mode 100644 index 000000000..d9799086a --- /dev/null +++ b/.github/agents/subagents/tester-synthesizer.agent.md @@ -0,0 +1,59 @@ +--- +description: "Tester loop synthesizer. Use when: combining adversarial tester signals into a coherent test strategy and final test outcomes." +name: "Tester Synthesizer" +argument-hint: "Synthesize Explorer, Inspector, and Saboteur findings into a concrete test plan and execution summary." +tools: [read, edit, search, execute, todo] +user-invocable: false +--- + +# Tester Loop Synthesizer + +## Role + +You synthesize evidence from adversarial tester subagents: + +- Explorer: uncovers untested behaviors and missing coverage. +- Inspector: validates requirement compliance. +- Saboteur: designs failure-oriented and break tests. + +Your output is a coherent testing decision, not a dump of conflicting feedback. + +## Process + +1. Collect findings from Explorer, Inspector, and Saboteur. +2. Resolve conflicts by preferring requirement correctness first, then robustness, then coverage depth. +3. Produce a test strategy with: + - Must-add tests. + - Nice-to-add tests. + - Tests intentionally deferred (with rationale). +4. Execute and summarize targeted test results. + +## Conflict Resolution Principles + +1. Requirement correctness beats coverage expansion. +2. Reproducible failures beat speculative concerns. +3. High-severity break tests beat nice-to-have coverage work. +4. Deterministic tests beat flaky broad tests. + +## Required Output Schema + +1. Decision Summary. +2. Evidence Used. +3. Conflict Resolution Log. +4. Risks and Mitigations. +5. Rejected Options. +6. Unresolved Conflicts. +7. Next Actions. + +## Loop-Specific Required Content + +1. Requirement verification results. +2. Coverage gaps found. +3. Break tests that currently fail and why they matter. +4. Completeness checklist: + - Primary regression. + - Inverse or sanity check. + - Boundary case. + - Interaction case. +5. Prioritized recommended test edits or additions. + diff --git a/.github/instructions/python-best-practices.instructions.md b/.github/instructions/python-best-practices.instructions.md new file mode 100644 index 000000000..77b77a9c5 --- /dev/null +++ b/.github/instructions/python-best-practices.instructions.md @@ -0,0 +1,98 @@ +--- +description: Best practices for developing Python code in this repository +applyTo: '**/*.py' +--- + +# Python Best Practices + +## Section 1: Universal Rules + +Non-negotiable. Always follow these regardless of project tooling or configuration. + +### 1.1 Put All Tests in a `tests/` Directory + +Never scatter test files at the top level alongside source code. Place all tests in a dedicated `tests/` subdirectory. If using pytest, configure test path discovery in `pyproject.toml`. + +``` +# Correct +my_project/ + src/app.py + src/utils.py + tests/test_app.py + tests/test_utils.py +``` + +### 1.2 Use the Simplest Build Tool That Works + +For pure Python projects, prefer modern build backends like hatchling or pdm-backend. Do not default to setuptools unless the project already uses it. + +### 1.3 Never Write Dependencies in Multiple Places + +Define direct dependencies in one place only (`pyproject.toml`). Do not duplicate them across `requirements.txt`, `pyproject.toml`, and `setup.py`. + +### 1.4 Dev Dependencies Go in `[dependency-groups]` + +Development dependencies (pytest, ruff, mypy) belong in `[dependency-groups] dev`, not `[project.optional-dependencies]`. + +### 1.5 Use Leading Underscores for Non-Public APIs + +Add leading underscores to modules, classes, functions, and methods that are not part of the public API. + +### 1.6 Imports Belong at the Top of the File + +Place all imports at the top of the file. Inline imports should only be used for lazy loading or circular import resolution. + +### 1.7 Follow PEP 8 Naming Conventions + +- `snake_case` for functions and variables +- `PascalCase` for classes +- `UPPER_SNAKE_CASE` for constants +- `_leading_underscore` for private/internal names + +### 1.8 Write Idiomatic Python for the Target Version + +Use features available in the project's target Python version. Use `match`/`case`, type union syntax (`X | Y`), `pathlib`, and built-in generics where supported. + +### 1.9 Avoid Deep Nesting — Use Early Returns + +Avoid nesting code deeper than 3 levels. Use guard clauses, early returns, `continue` in loops, and extracted helper functions to flatten logic. + +### 1.10 Default to Fail-Closed Logic + +For authorization and security checks, structure code to fail-closed (deny by default). + +### 1.11 Prefer Built-ins and Comprehensions Over Helper Methods + +Use `any()`, `all()`, `sum()`, `min()`, `max()` and other built-ins. Leverage comprehensions and generators. + +### 1.12 Match the Project's Existing Test Framework + +Default to pytest unless preexisting code uses unittest. Respect existing patterns. + +### 1.13 Don't Over-Mock in Tests + +Mock external dependencies (APIs, databases) but test actual business logic with real objects when possible. + +### 1.14 Don't Over-Document — Explain Why, Not What + +Comments should explain why, not what. Code should be self-documenting through clear naming. + +### 1.15 Write Library-Quality Code, Not Scripts + +Structure code as a library or reusable software product. Use proper classes, modules, error handling, and separation of concerns. + +## Section 2: Environment-Aware Guidance + +These rules adapt based on detected project conventions. Framework-specific skills extend this section. + +### 2.1 Formatter and Linter + +If a project configuration is detected (e.g. `ruff.toml`, `pyproject.toml [tool.ruff]`, `.flake8`), follow the project's formatter/linter. If none is detected, prefer Ruff for both formatting and linting. + +### 2.2 Source Layout + +Respect the detected source layout (`src/` layout vs flat package). If creating a new project, prefer the `src/` layout. + +### 2.3 Test Framework + +Detect and follow the existing test framework. If no existing framework is detected, default to pytest. diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index a8dbc6a26..0525536ad 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -23,6 +23,41 @@ If you want to debug library files, you have to disable `justMyCode` in `launch. }, ``` +### Debugger breaks on SystemExit +By default, the debugger treats `SystemExit` with a non-zero exit code as an uncaught exception and breaks on it. If you use `sys.exit()` intentionally (e.g. in CLI tools, test runners like pytest, or frameworks like Django/Flask), this can be unwanted. + +You can control exactly which `SystemExit` codes the debugger breaks on using the `breakOnSystemExit` setting in `launch.json`. It accepts an array of exit codes and/or ranges: + +```js +// Never break on any SystemExit: + { + "breakOnSystemExit": [] + } + +// Only break on specific exit codes: +{ + "breakOnSystemExit": [1, 2] +} + +// Break on exit codes using ranges (inclusive): +{ + "breakOnSystemExit": [{"from": 1, "to": 255}] +} + +// Mix specific codes and ranges: +{ + "breakOnSystemExit": [0, {"from": 3, "to": 100}] +} +``` + +When `breakOnSystemExit` is not specified, the default behavior applies: +- `SystemExit(0)` and `SystemExit(None)` are ignored (successful exit). +- All other non-zero exit codes cause a break. +- When **`django`** or **`flask`** is `true`, exit code `3` is also ignored (used for reload signaling). +- When **`breakOnSystemExitZero`** is `true`, the debugger also breaks on `SystemExit(0)` and `SystemExit(None)`. + +When `breakOnSystemExit` is explicitly set, it overrides all of the above — only the listed codes and ranges will cause breaks. + ## Filing an issue When filing an issue, make sure you do the following: diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_api.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_api.py index c85ad0f09..5899587e8 100644 --- a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_api.py +++ b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_api.py @@ -994,6 +994,9 @@ def stop_on_entry(self): def set_ignore_system_exit_codes(self, py_db, ignore_system_exit_codes): py_db.set_ignore_system_exit_codes(ignore_system_exit_codes) + def set_break_on_system_exit(self, py_db, codes, ranges): + py_db.set_break_on_system_exit(codes, ranges) + SourceMappingEntry = pydevd_source_mapping.SourceMappingEntry def set_source_mapping(self, py_db, source_filename, mapping): diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_process_net_command_json.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_process_net_command_json.py index 1fe207c21..0c483af49 100644 --- a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_process_net_command_json.py +++ b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_process_net_command_json.py @@ -119,6 +119,51 @@ def _convert_rules_to_exclude_filters(rules, on_error): return exclude_filters +def _parse_break_on_system_exit(args): + """Parse the ``breakOnSystemExit`` launch/attach argument. + + :returns: + ``(codes_set, ranges_list)`` when the setting is present and valid, + or ``None`` when it is absent or completely invalid. + """ + break_on_system_exit = args.get("breakOnSystemExit", None) + if break_on_system_exit is None: + return None + + if not isinstance(break_on_system_exit, (list, tuple)): + pydev_log.info("Expected breakOnSystemExit to be a list. Received: %s" % (break_on_system_exit,)) + return None + + codes = set() + ranges = [] + for item in break_on_system_exit: + if item is None: + codes.add(None) + elif isinstance(item, int): + codes.add(item) + elif isinstance(item, dict): + range_from = item.get("from", 0) + range_to = item.get("to", 0) + if not isinstance(range_from, int) or not isinstance(range_to, int): + pydev_log.info( + "Expected 'from' and 'to' in breakOnSystemExit range to be integers. " + "Received: from=%s, to=%s" % (range_from, range_to) + ) + continue + if range_from > range_to: + pydev_log.info( + "breakOnSystemExit range has 'from' > 'to' (matches nothing): " + "from=%s, to=%s" % (range_from, range_to) + ) + ranges.append((range_from, range_to)) + else: + pydev_log.info( + "Unexpected item type in breakOnSystemExit (expected int, None, or dict): %s" % (item,) + ) + + return (codes, ranges) + + class IDMap(object): def __init__(self): self._value_to_key = {} @@ -433,7 +478,11 @@ def get_variable_presentation(setting, default): self.api.set_show_return_values(py_db, self._options.show_return_value) - if not self._options.break_system_exit_zero: + parsed = _parse_break_on_system_exit(args) + if parsed is not None: + codes, ranges = parsed + self.api.set_break_on_system_exit(py_db, codes, ranges) + elif not self._options.break_system_exit_zero: ignore_system_exit_codes = [0, None] if self._options.django_debug or self._options.flask_debug: ignore_system_exit_codes += [3] diff --git a/src/debugpy/_vendored/pydevd/pydevd.py b/src/debugpy/_vendored/pydevd/pydevd.py index db0336448..739693e9b 100644 --- a/src/debugpy/_vendored/pydevd/pydevd.py +++ b/src/debugpy/_vendored/pydevd/pydevd.py @@ -774,6 +774,7 @@ def new_trace_dispatch(frame, event, arg): self._exclude_by_filter_cache = {} self._apply_filter_cache = {} self._ignore_system_exit_codes = set() + self._break_on_system_exit = None # None = default behavior, tuple = (codes_set, ranges_list) # DAP related self._dap_messages_listeners = [] @@ -926,11 +927,45 @@ def set_ignore_system_exit_codes(self, ignore_system_exit_codes): assert isinstance(ignore_system_exit_codes, (list, tuple, set)) self._ignore_system_exit_codes = set(ignore_system_exit_codes) + def set_break_on_system_exit(self, codes, ranges): + """Set explicit list of SystemExit codes to break on. + + :param set codes: + Set of specific exit codes (ints, None) to break on. + :param list ranges: + List of (from_code, to_code) tuples (inclusive) to break on. + """ + self._break_on_system_exit = (codes, ranges) + self._ignore_system_exit_codes = set() # Clear legacy state to prevent conflicts + def ignore_system_exit_code(self, system_exit_exc): - if hasattr(system_exit_exc, "code"): - return system_exit_exc.code in self._ignore_system_exit_codes - else: - return system_exit_exc in self._ignore_system_exit_codes + """Determine whether to ignore (not break on) a SystemExit exception. + + Returns True to ignore (skip the break), False to break. + + When ``_break_on_system_exit`` is set, the semantics are inverted: + the configuration specifies which codes TO BREAK ON, and this method + returns False (don't ignore) if the code matches. Non-int, non-None + codes (e.g. strings passed to ``sys.exit("error")``) are treated as + "always break" to avoid silently suppressing unexpected exits. + """ + code = system_exit_exc.code if hasattr(system_exit_exc, "code") else system_exit_exc + + if self._break_on_system_exit is not None: + codes_set, ranges_list = self._break_on_system_exit + if code in codes_set: + return False + if isinstance(code, int): + for range_from, range_to in ranges_list: + if range_from <= code <= range_to: + return False + return True + if code is None: + return True + # Non-int, non-None codes (e.g. strings): always break. + return False + + return code in self._ignore_system_exit_codes def block_until_configuration_done(self, cancel=None): if cancel is None: diff --git a/tests/debug/config.py b/tests/debug/config.py index 90115ef8e..eeb2e910b 100644 --- a/tests/debug/config.py +++ b/tests/debug/config.py @@ -27,6 +27,7 @@ class DebugConfig(MutableMapping): PROPERTIES = { # Common "breakOnSystemExitZero": False, + "breakOnSystemExit": None, "debugOptions": [], "django": False, "jinja": False, diff --git a/tests/debugpy/test_exception.py b/tests/debugpy/test_exception.py index bf21c9dd7..444fc0a42 100644 --- a/tests/debugpy/test_exception.py +++ b/tests/debugpy/test_exception.py @@ -333,6 +333,139 @@ def code_to_debug(): session.request_continue() +@pytest.mark.parametrize("target", targets.all_named) +@pytest.mark.parametrize("run", runners.all) +@pytest.mark.parametrize("exit_code", [0, 1, 3]) +def test_break_on_system_exit_empty(pyfile, target, run, exit_code): + @pyfile + def code_to_debug(): + import debuggee + import sys + + debuggee.setup() + exit_code = eval(sys.argv[1]) + print("sys.exit(%r)" % (exit_code,)) + sys.exit(exit_code) + + with debug.Session() as session: + session.expected_exit_code = some.int + session.config["breakOnSystemExit"] = [] + + with run(session, target(code_to_debug, args=[repr(exit_code)])): + session.request( + "setExceptionBreakpoints", {"filters": ["raised", "uncaught"]} + ) + + # With breakOnSystemExit=[], no SystemExit should cause a break, + # regardless of exit code. The session should end without stopping. + + +@pytest.mark.parametrize("target", targets.all_named) +@pytest.mark.parametrize("run", runners.all) +def test_break_on_system_exit_specific_codes(pyfile, target, run): + @pyfile + def code_to_debug(): + import debuggee + import sys + + debuggee.setup() + exit_code = eval(sys.argv[1]) + print("sys.exit(%r)" % (exit_code,)) + sys.exit(exit_code) + + # Exit code 1 is in the break list, so should break. + with debug.Session() as session: + session.expected_exit_code = some.int + session.config["breakOnSystemExit"] = [1] + + with run(session, target(code_to_debug, args=["1"])): + session.request( + "setExceptionBreakpoints", {"filters": ["uncaught"]} + ) + + session.wait_for_stop("exception") + session.request_continue() + + +@pytest.mark.parametrize("target", targets.all_named) +@pytest.mark.parametrize("run", runners.all) +def test_break_on_system_exit_skips_unlisted_codes(pyfile, target, run): + @pyfile + def code_to_debug(): + import debuggee + import sys + + debuggee.setup() + exit_code = eval(sys.argv[1]) + print("sys.exit(%r)" % (exit_code,)) + sys.exit(exit_code) + + # Exit code 2 is NOT in the break list, so should not break. + with debug.Session() as session: + session.expected_exit_code = some.int + session.config["breakOnSystemExit"] = [1] + + with run(session, target(code_to_debug, args=["2"])): + session.request( + "setExceptionBreakpoints", {"filters": ["uncaught"]} + ) + + # Should not break - exit code 2 is not in the break list. + + +@pytest.mark.parametrize("target", targets.all_named) +@pytest.mark.parametrize("run", runners.all) +def test_break_on_system_exit_range(pyfile, target, run): + @pyfile + def code_to_debug(): + import debuggee + import sys + + debuggee.setup() + exit_code = eval(sys.argv[1]) + print("sys.exit(%r)" % (exit_code,)) + sys.exit(exit_code) + + # Exit code 5 is within the range {"from": 3, "to": 10}, so should break. + with debug.Session() as session: + session.expected_exit_code = some.int + session.config["breakOnSystemExit"] = [{"from": 3, "to": 10}] + + with run(session, target(code_to_debug, args=["5"])): + session.request( + "setExceptionBreakpoints", {"filters": ["uncaught"]} + ) + + session.wait_for_stop("exception") + session.request_continue() + + +@pytest.mark.parametrize("target", targets.all_named) +@pytest.mark.parametrize("run", runners.all) +def test_break_on_system_exit_range_skips_outside(pyfile, target, run): + @pyfile + def code_to_debug(): + import debuggee + import sys + + debuggee.setup() + exit_code = eval(sys.argv[1]) + print("sys.exit(%r)" % (exit_code,)) + sys.exit(exit_code) + + # Exit code 2 is outside the range {"from": 3, "to": 10}, so should not break. + with debug.Session() as session: + session.expected_exit_code = some.int + session.config["breakOnSystemExit"] = [{"from": 3, "to": 10}] + + with run(session, target(code_to_debug, args=["2"])): + session.request( + "setExceptionBreakpoints", {"filters": ["uncaught"]} + ) + + # Should not break - exit code 2 is outside the range. + + @pytest.mark.parametrize("max_frames", ["default", "all", 10]) def test_exception_stack(pyfile, target, run, max_frames): @pyfile