diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..82bcf69 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,102 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 + +name: Bug Report +description: Report unexpected behaviour in Zenzic's checks, output, or exit codes. +title: "fix: " +labels: ["bug", "triage"] +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to report a bug. Please fill in all required fields — + this allows us to reproduce and fix the issue quickly. + + - type: input + id: version + attributes: + label: Zenzic version + description: Output of `zenzic --version` + placeholder: "0.4.0rc3" + validations: + required: true + + - type: input + id: python + attributes: + label: Python version + description: Output of `python --version` + placeholder: "3.13.0" + validations: + required: true + + - type: dropdown + id: os + attributes: + label: Operating system + options: + - Linux + - macOS + - Windows + multiple: false + validations: + required: true + + - type: dropdown + id: engine + attributes: + label: Documentation engine + description: Which adapter was in use when the bug occurred? + options: + - MkDocs (mkdocs.yml present) + - Zensical (zensical.toml present) + - Vanilla (no engine config) + - Not applicable / unsure + multiple: false + validations: + required: true + + - type: textarea + id: command + attributes: + label: Command run + description: The exact `zenzic` command that triggered the bug. + placeholder: "zenzic check all --strict" + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected behaviour + description: What did you expect to happen? + validations: + required: true + + - type: textarea + id: actual + attributes: + label: Actual behaviour + description: What actually happened? Include the full terminal output if possible. + render: text + validations: + required: true + + - type: textarea + id: config + attributes: + label: zenzic.toml (if any) + description: Paste your `zenzic.toml` content here (remove any secrets first). + render: toml + validations: + required: false + + - type: checkboxes + id: checklist + attributes: + label: Pre-submission checklist + options: + - label: I have searched existing issues and this is not a duplicate. + required: true + - label: I have reproduced the bug with the latest published version. + required: true diff --git a/.github/ISSUE_TEMPLATE/docs_issue.yml b/.github/ISSUE_TEMPLATE/docs_issue.yml new file mode 100644 index 0000000..2317023 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/docs_issue.yml @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 + +name: Documentation Issue +description: Report an error, gap, or unclear section in the Zenzic documentation. +title: "docs: " +labels: ["documentation", "triage"] +body: + - type: markdown + attributes: + value: | + Documentation issues are just as important as code bugs. Thank you for helping us + keep the docs accurate and clear. + + - type: input + id: page + attributes: + label: Page or file + description: URL or file path of the documentation page with the issue. + placeholder: "https://zenzic.pythonwoods.dev/usage/advanced/ or docs/usage/advanced.md" + validations: + required: true + + - type: dropdown + id: issue_type + attributes: + label: Type of documentation issue + options: + - Incorrect information + - Missing information + - Outdated information + - Unclear or confusing explanation + - Broken link or missing asset + - Typo or formatting problem + - Missing Italian (it/) translation parity + - Other + multiple: false + validations: + required: true + + - type: textarea + id: description + attributes: + label: Description + description: What is wrong or missing? Quote the specific text if relevant. + validations: + required: true + + - type: textarea + id: suggestion + attributes: + label: Suggested fix + description: > + Optional — if you know what the correct content should be, describe it here. + You can also open a pull request directly. + validations: + required: false + + - type: checkboxes + id: checklist + attributes: + label: Pre-submission checklist + options: + - label: I have checked the latest published version of the docs at zenzic.pythonwoods.dev. + required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..5398e9d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,85 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 + +name: Feature Request +description: Propose a new check, adapter, Shield pattern, or CLI capability. +title: "feat: " +labels: ["enhancement", "triage"] +body: + - type: markdown + attributes: + value: | + Before opening a feature request, please open a discussion on the + [issue tracker](https://github.com/PythonWoods/zenzic/issues) so we can agree on + the approach before you invest time implementing it. + + - type: dropdown + id: category + attributes: + label: Feature category + options: + - New check (link, orphan, snippet, placeholder, asset, references) + - New engine adapter (e.g. Hugo, Docusaurus) + - Shield — new credential family + - CLI command or flag + - Configuration option (zenzic.toml field) + - Custom rules DSL extension + - Performance improvement + - Developer / API + - Other + multiple: false + validations: + required: true + + - type: textarea + id: problem + attributes: + label: Problem to solve + description: What gap does this feature fill? What currently breaks or is missing? + validations: + required: true + + - type: textarea + id: proposal + attributes: + label: Proposed solution + description: | + Describe the feature in concrete terms. If proposing a new adapter, describe the + entry-point registration, the `BaseAdapter` methods it must implement, and how + engine-specific config will be read. + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: What other approaches did you consider, and why did you rule them out? + validations: + required: false + + - type: checkboxes + id: pillars + attributes: + label: Zenzic design pillars + description: > + Zenzic's Core has three non-negotiable constraints. Please confirm your proposal + respects all that apply. + options: + - label: > + **Source-first** — the feature operates on raw source files; it does not require + a documentation engine to be installed or executed. + - label: > + **No subprocesses** — all logic uses pure Python or stdlib; no `subprocess.run` + calls in the linting path. + - label: > + **Pure functions** — core validation logic is deterministic, side-effect-free, + and testable without I/O. + + - type: checkboxes + id: checklist + attributes: + label: Pre-submission checklist + options: + - label: I have searched existing issues and this is not already proposed. + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..c6db305 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,59 @@ + + + + +## Description + + + +Closes # + +## Type of change + +- [ ] Bug fix +- [ ] New feature (new adapter, check, Shield pattern, CLI flag) +- [ ] Documentation update +- [ ] Performance improvement +- [ ] Refactor (no behaviour change) +- [ ] Test coverage + +--- + +## The Zenzic Way — mandatory checklist + +Zenzic's Core is built on three non-negotiable design pillars. Every PR that touches `src/` +must satisfy all that apply. + +### 1. Source-first + +- [ ] This change operates on **raw source files** only — it does not call `mkdocs build`, + import a documentation framework, or depend on generated HTML or build artefacts. + +### 2. No subprocesses + +- [ ] No `subprocess.run`, `os.system`, or equivalent shell calls have been added to the + linting path (`src/zenzic/core/`). +- [ ] Any new parsers use pure Python stdlib (e.g. `tomllib`, `json`, `yaml.safe_load`, + `compile()`). + +### 3. Pure functions + +- [ ] Core validation logic is **deterministic and side-effect-free**: no file I/O, no + network access, no global state mutations inside pure functions. +- [ ] I/O is confined to CLI wrappers and scanner edges, not to validator or checker modules. + +--- + +## Quality gates + +- [ ] `nox -s tests` passes (all existing tests green, coverage ≥ 80%). +- [ ] New behaviour is covered by tests — happy path and at least one failure case. +- [ ] `nox -s lint` and `nox -s typecheck` pass (`ruff check` + `mypy --strict`). +- [ ] `nox -s preflight` passes end-to-end (includes `zenzic check all --strict` self-dogfood). +- [ ] REUSE/SPDX headers are present on every new file (`nox -s reuse`). + +--- + +## Notes for reviewers + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ff7bb3b..ebea117 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,16 +9,20 @@ on: paths: - 'src/**' - 'tests/**' + - 'docs/**' - 'pyproject.toml' - 'uv.lock' + - 'noxfile.py' - '.github/workflows/ci.yml' pull_request: branches: [ main ] paths: - 'src/**' - 'tests/**' + - 'docs/**' - 'pyproject.toml' - 'uv.lock' + - 'noxfile.py' - '.github/workflows/ci.yml' permissions: diff --git a/CHANGELOG.it.md b/CHANGELOG.it.md index 3932297..0973aa7 100644 --- a/CHANGELOG.it.md +++ b/CHANGELOG.it.md @@ -12,6 +12,95 @@ Le versioni seguono il [Versionamento Semantico][semver]. --- +## [0.4.0-rc3] — 2026-03-29 — Fix i18n Ancore, Snippet Multilingua & Shield Deep-Scan + +> **Sprint 7.** Il gap di fallback i18n per `AnchorMissing` è chiuso. Codice morto eliminato. +> Utility condivisa per la rimappatura dei percorsi locale estratta. Visual Snippets per i +> rilevamenti delle regole custom. Documentazione usage suddivisa in tre pagine dedicate. +> Schema JSON stabilizzato a 7 chiavi. Validazione snippet multilingua (Python/YAML/JSON/TOML) +> e Shield deep-scan sull'intero file aggiunti. + +### Aggiunto + +- **Validazione snippet multilingua** — `check_snippet_content` valida ora i blocchi di codice + delimitati per quattro linguaggi usando parser puri in Python (nessun sottoprocesso): + `python`/`py` → `compile()`; `yaml`/`yml` → `yaml.safe_load()`; `json` → `json.loads()`; + `toml` → `tomllib.loads()`. I blocchi con tag di linguaggio non supportati (es. `bash`) vengono + silenziosamente saltati. `_extract_python_blocks` rinominato in `_extract_code_blocks`. + +- **Shield deep-scan — credenziali nei blocchi delimitati** — Lo scanner di credenziali opera + ora su ogni riga del file sorgente, incluse le righe nei blocchi di codice delimitati (con o + senza etichetta). In precedenza `_iter_content_lines` alimentava sia lo Shield che l'harvester + dei riferimenti, rendendo il contenuto nei fence invisibile allo Shield. Un nuovo generatore + `_skip_frontmatter` fornisce un flusso grezzo di righe (solo senza frontmatter); `harvest()` + esegue ora due pass indipendenti — Shield sul flusso grezzo, ref-def + alt-text sul flusso + filtrato dei contenuti. Link e definizioni di riferimento nei blocchi delimitati rimangono + ignorati per prevenire falsi positivi. + +- **Shield esteso a 7 famiglie di credenziali** — Aggiunte chiavi live Stripe + (`sk_live_[0-9a-zA-Z]{24}`), token Slack (`xox[baprs]-[0-9a-zA-Z]{10,48}`), chiavi API + Google (`AIza[0-9A-Za-z\-_]{35}`) e chiavi private PEM generiche + (`-----BEGIN [A-Z ]+ PRIVATE KEY-----`) in `core/shield.py`. + +- **Metodo `resolve_anchor()` nel protocollo `BaseAdapter`** — Nuovo metodo adapter che + restituisce `True` quando un anchor miss su un file locale deve essere soppresso perché + l'ancora esiste nel file equivalente della locale di default. Implementato in + `MkDocsAdapter`, `ZensicalAdapter` (tramite `remap_to_default_locale()`) e `VanillaAdapter` + (restituisce sempre `False`). + +- **`adapters/_utils.py` — utility pura `remap_to_default_locale()`** — Estrae la logica di + rimappatura dei percorsi locale che era duplicata indipendentemente in `resolve_asset()` e + `is_shadow_of_nav_page()` in entrambi gli adapter. Funzione pura: riceve + `(abs_path, docs_root, locale_dirs)`, restituisce il `Path` equivalente nella locale di + default o `None`. Nessun I/O. + +- **Visual Snippets per i rilevamenti `[[custom_rules]]`** — Le violazioni delle regole custom + mostrano ora la riga sorgente incriminata sotto l'intestazione del rilevamento, preceduta + dall'indicatore `│` nella colore della severity del rilevamento. I rilevamenti standard non + sono interessati. + +- **`strict` e `exit_zero` come campi di `zenzic.toml`** — Entrambi i flag sono ora campi + di prima classe in `ZenzicConfig` (tipo `bool | None`, sentinella `None` = non impostato). + I flag CLI sovrascrivono i valori TOML. Abilita default a livello di progetto. + +- **Schema output JSON — 7 chiavi stabili** — `--format json` emette: + `links`, `orphans`, `snippets`, `placeholders`, `unused_assets`, `references`, `nav_contract`. + +- **Suddivisione documentazione usage** — `docs/usage/index.md` suddivisa in tre pagine + dedicate: `usage/index.md` (install + workflow), `usage/commands.md` (riferimento CLI), + `usage/advanced.md` (pipeline tre-pass, Shield, API programmatica, multilingua). + Mirror italiani (`docs/it/usage/`) a piena parità. Nav `mkdocs.yml` aggiornata. + +### Risolto + +- **`AnchorMissing` non aveva la soppressione tramite fallback i18n** — Il ramo `AnchorMissing` + in `validate_links_async` riportava incondizionatamente. I link a intestazioni tradotte in + file locale generavano falsi positivi. Fix: il ramo `AnchorMissing` ora chiama + `adapter.resolve_anchor()`. Cinque nuovi test di integrazione in `TestI18nFallbackIntegration`. + +### Rimosso + +- **`_should_suppress_via_i18n_fallback()`** — Codice morto. Era definita in `validator.py` + ma non veniva mai chiamata. Rimossa permanentemente. +- **`I18nFallbackConfig` NamedTuple** — Struttura dati interna per la funzione eliminata. + Rimossa. +- **`_I18N_FALLBACK_DISABLED`** — Costante sentinella per la funzione eliminata. Rimossa. +- **`_extract_i18n_fallback_config()`** — Anch'essa codice morto. Era testata da + `TestI18nFallbackConfig` (6 test), anch'essa rimossa. Totale: ~118 righe da `validator.py`. + +### Test + +- 5 nuovi test di integrazione anchor fallback in `TestI18nFallbackIntegration`. +- `TestI18nFallbackConfig` (6 test per le funzioni eliminate) rimossa. +- 8 nuovi test di validazione snippet (YAML valido/non valido, alias `yml`, JSON valido/non + valido, accuratezza numero di riga JSON, TOML valido/non valido). +- 5 nuovi test Shield deep-scan: segreto in fence senza etichetta, segreto in fence `bash`, + segreto in fence senza creazione ref-def, blocco codice pulito senza findings. +- **446 test passano.** `nox preflight` — tutti i gate verdi: ruff ✓ mypy ✓ pytest ✓ + reuse ✓ mkdocs build --strict ✓ zenzic check all --strict ✓. + +--- + ## [0.4.0-rc2] — 2026-03-28 — Il Grande Disaccoppiamento > **Sprint 6.** Zenzic cessa di possedere i propri adapter. Gli adapter di terze parti si diff --git a/CHANGELOG.md b/CHANGELOG.md index aa13bab..2e7cb78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,92 @@ Versions follow [Semantic Versioning](https://semver.org/). --- +## [0.4.0-rc3] — 2026-03-29 — i18n Anchor Fix, Multi-language Snippets & Shield Deep-Scan + +> **Sprint 7.** The `AnchorMissing` i18n fallback gap closed. Dead code eliminated. Shared +> locale path-remapping utility extracted. Visual Snippets for custom rule findings. Usage docs +> split into three focused pages. JSON schema stabilised at 7 keys. Multi-language snippet +> validation (Python/YAML/JSON/TOML) and full-file Shield deep-scan added. + +### Added + +- **Multi-language snippet validation** — `check_snippet_content` now validates fenced code + blocks for four languages using pure Python parsers (no subprocesses): + `python`/`py` → `compile()`; `yaml`/`yml` → `yaml.safe_load()`; `json` → `json.loads()`; + `toml` → `tomllib.loads()`. Blocks with unsupported language tags (e.g. `bash`) are silently + skipped. `_extract_python_blocks` renamed to `_extract_code_blocks` to reflect the broader + scope. + +- **Shield deep-scan — credentials in fenced blocks** — The credential scanner now operates on + every line of the source file, including lines inside fenced code blocks (labelled or + unlabelled). Previously, `_iter_content_lines` fed both the Shield and the reference harvester, + causing fenced content to be invisible to the Shield. A new `_skip_frontmatter` generator + provides a raw line stream (minus frontmatter only); `harvest()` now runs two independent + passes — Shield on the raw stream, ref-defs + alt-text on the filtered content stream. Links + and reference definitions inside fenced blocks remain ignored to prevent false positives. + +- **Shield extended to 7 credential families** — Added Stripe live keys + (`sk_live_[0-9a-zA-Z]{24}`), Slack tokens (`xox[baprs]-[0-9a-zA-Z]{10,48}`), Google API + keys (`AIza[0-9A-Za-z\-_]{35}`), and generic PEM private keys + (`-----BEGIN [A-Z ]+ PRIVATE KEY-----`) to `_SECRETS` in `core/shield.py`. + +- **`resolve_anchor()` method on `BaseAdapter` protocol** — New adapter method that returns + `True` when an anchor miss on a locale file should be suppressed because the anchor exists + in the default-locale equivalent. Implemented in `MkDocsAdapter`, `ZensicalAdapter` (via + `remap_to_default_locale()`), and `VanillaAdapter` (always returns `False`). + +- **`adapters/_utils.py` — `remap_to_default_locale()` pure utility** — Extracts the shared + locale path-remapping logic that was independently duplicated across `resolve_asset()` and + `is_shadow_of_nav_page()` in both adapters. Pure function: takes `(abs_path, docs_root, + locale_dirs)`, returns the default-locale equivalent `Path` or `None`. Zero I/O. + +- **Visual Snippets for `[[custom_rules]]` findings** — Custom rule violations now display the + offending source line below the finding header, prefixed with the `│` indicator rendered in + the finding's severity colour. Standard check findings are unaffected. + +- **`strict` and `exit_zero` as `zenzic.toml` fields** — Both flags are now first-class + `ZenzicConfig` fields (type `bool | None`, sentinel `None` = not set). CLI flags override + TOML values. Enables project-level defaults without CLI ceremony. + +- **JSON output schema — 7 stable keys** — `--format json` emits: + `links`, `orphans`, `snippets`, `placeholders`, `unused_assets`, `references`, `nav_contract`. + +- **Usage docs split** — `docs/usage/index.md` split into three focused pages: + `usage/index.md` (install + workflow), `usage/commands.md` (CLI reference), + `usage/advanced.md` (three-pass pipeline, Shield, programmatic API, multi-language). + Italian mirrors (`docs/it/usage/`) at full parity. `mkdocs.yml` nav updated. + +### Fixed + +- **`AnchorMissing` had no i18n fallback suppression** — The `AnchorMissing` branch in + `validate_links_async` reported unconditionally. Links to translated headings in locale files + generated false positives. Fix: `AnchorMissing` branch now calls `adapter.resolve_anchor()`. + Five new integration tests in `TestI18nFallbackIntegration` cover: suppressed miss, miss in + both locales, fallback disabled, EN source file, direct resolution. + +### Removed + +- **`_should_suppress_via_i18n_fallback()`** — Dead code. Was defined in `validator.py` but + never called. Removed permanently. +- **`I18nFallbackConfig` NamedTuple** — Internal data structure for the above deleted function. + Removed. +- **`_I18N_FALLBACK_DISABLED` sentinel** — Constant for the above deleted function. Removed. +- **`_extract_i18n_fallback_config()`** — Also dead. Was tested by `TestI18nFallbackConfig` + (6 tests), which is also removed. Total removal: ~118 lines from `validator.py`. + +### Tests + +- 5 new anchor fallback integration tests in `TestI18nFallbackIntegration`. +- `TestI18nFallbackConfig` (6 tests for deleted functions) removed. +- 8 new snippet validation tests (YAML valid/invalid, `yml` alias, JSON valid/invalid, + JSON line-number accuracy, TOML valid/invalid). +- 5 new Shield deep-scan tests: secret in unlabelled fence, secret in `bash` fence, + secret in fence with no ref-def created, clean code block no findings, combined invariant. +- **446 tests pass.** `nox preflight` — all gates green: ruff ✓ mypy ✓ pytest ✓ reuse ✓ + mkdocs build --strict ✓ zenzic check all --strict ✓. + +--- + ## [0.4.0-rc2] — 2026-03-28 — The Great Decoupling > **Sprint 6.** Zenzic ceases to own its adapters. Third-party adapters install as Python diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..f5d3a9b --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 + +cff-version: 1.2.0 +message: "If you use Zenzic in your research or documentation pipelines, please cite it as follows." +type: software +authors: + - name: "PythonWoods" + email: "dev@pythonwoods.dev" +title: "Zenzic: The Agnostic Documentation Integrity Framework" +abstract: > + A high-performance, engine-agnostic framework for Markdown documentation integrity. + Zenzic validates links, orphan pages, code snippets (Python, YAML, JSON, TOML), placeholder + content, and unreferenced assets while enforcing security through a dedicated credential + scanner (the Shield). Built on pure functional principles in Python 3.11+, it operates + source-first — no build framework required — and integrates with any Markdown-based + documentation system via a plugin adapter protocol. +version: 0.4.0rc3 +date-released: 2026-03-29 +url: "https://zenzic.pythonwoods.dev/" +repository-code: "https://github.com/PythonWoods/zenzic" +license: Apache-2.0 +keywords: + - documentation + - linter + - markdown + - link-checker + - security + - yaml + - mkdocs + - ci-cd diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index dc18683..ebcde2f 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -3,31 +3,84 @@ SPDX-FileCopyrightText: 2026 PythonWoods SPDX-License-Identifier: Apache-2.0 --> -# Code of Conduct +# Contributor Covenant Code of Conduct -We are committed to a welcoming, safe, and respectful community. +Zenzic adopts the [Contributor Covenant 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html) +as its standard for community interaction. We are committed to providing a welcoming, +respectful, and inclusive environment for all. -## Expected Behavior +## Our Pledge -- Be respectful and inclusive. -- Assume good intent; seek to understand before responding. -- Provide constructive feedback; critique code, not people. -- Follow project guidelines and security practices. +We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. -## Unacceptable Behavior +We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. -- Harassment, personal attacks, or discriminatory language. -- Publishing private information without consent. -- Disruptive or aggressive behavior in any project space. +## Our Standards -## Scope +Examples of behavior that contributes to a positive environment for our community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. -This Code applies to all project spaces, including issues, pull requests, discussions, and any communication channel associated with Zenzic. +Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. -## Reporting +## Scope -Report incidents to: ****. Include as much detail as possible (what happened, when/where, links, screenshots if applicable). +This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement -Maintainers will investigate in good faith and may take appropriate action, including warnings, temporary bans, or removal from the community. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at ****. All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. +**Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of actions. +**Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. +**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. +**Consequence**: A permanent ban from any sort of public interaction within the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1. +Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder]. + +[homepage]: https://www.contributor-covenant.org +[Mozilla's code of conduct enforcement ladder]: https://github.com/mozilla/diversity + +--- + +Based in Italy 🇮🇹 | Committed to the craft of Python development. diff --git a/NOTICE b/NOTICE index 18d248f..753bf3d 100644 --- a/NOTICE +++ b/NOTICE @@ -26,7 +26,7 @@ This product includes software developed by the PythonWoods contributors. 4. PyYAML (https://github.com/yaml/pyyaml) Copyright (c) Ingy döt Net and Kirill Simonov License: MIT - Used for: YAML configuration parsing (mkdocs.yml, zenzic.yml) + Used for: YAML configuration parsing (mkdocs.yml) 5. httpx (https://github.com/encode/httpx) Copyright (c) Encode OSS Ltd. @@ -48,7 +48,7 @@ They are not included in the distributed wheel. 7. MkDocs (https://github.com/mkdocs/mkdocs) Copyright (c) 2014-present, Tom Christie License: BSD-2-Clause - Used for: Documentation build backend (via Zensical) + Used for: Documentation build backend 8. Material for MkDocs (https://github.com/squidfunk/mkdocs-material) Copyright (c) 2016-present, Martin Donath diff --git a/README.it.md b/README.it.md index 8820ad8..313a881 100644 --- a/README.it.md +++ b/README.it.md @@ -66,7 +66,7 @@ fallback, nessuna supposizione. | --- | --- | --- | | Links | `zenzic check links` | Link interni non raggiungibili, ancore morte, **path traversal** | | Orfani | `zenzic check orphans` | File `.md` assenti dalla `nav` | -| Snippet | `zenzic check snippets` | Blocchi Python che non compilano | +| Snippet | `zenzic check snippets` | Blocchi Python, YAML, JSON e TOML con errori di sintassi | | Placeholder | `zenzic check placeholders` | Pagine stub e pattern di testo proibiti | | Asset | `zenzic check assets` | Immagini e file non referenziati da nessuna pagina | | **Riferimenti** | `zenzic check references` | Dangling References, Dead Definitions, **Zenzic Shield** | @@ -118,6 +118,21 @@ uv add --dev zenzic pip install zenzic ``` +### Rendering MkDocs — extra `zenzic[docs]` + +Il core di Zenzic non ha dipendenze: validare il Markdown grezzo richiede solo `zenzic`. +Lo stack MkDocs è necessario solo per **renderizzare** il sito, non per validarlo. + +Per installare anche lo stack completo MkDocs: + +```bash +# uv +uv add --dev "zenzic[docs]" + +# pip +pip install "zenzic[docs]" +``` + --- ## Utilizzo CLI @@ -167,6 +182,11 @@ zenzic serve --port 9000 > `zenzic check references` esce con codice 2, una credenziale è stata trovata nella > documentazione. Ruotare la credenziale immediatamente. +Lo **Zenzic Shield** rileva 7 famiglie di credenziali (chiavi OpenAI, token GitHub, access key +AWS, chiavi live Stripe, token Slack, chiavi API Google e chiavi private PEM) su **ogni riga del +file sorgente** — incluse le righe dentro i blocchi di codice `bash`, `yaml` e senza etichetta. +Una credenziale in un esempio di codice è comunque una credenziale esposta. + --- ## DSL `[[custom_rules]]` @@ -221,6 +241,22 @@ non segnalare mai i file tradotti come orfani. --- +## Contribuire + +Bug report, miglioramenti alla documentazione e pull request sono benvenuti. Prima di iniziare: + +1. Apri un'issue per discutere la modifica — usa il [template appropriato](https://github.com/PythonWoods/zenzic/issues). +2. Leggi la [Guida ai Contributi](CONTRIBUTING.md) — in particolare il setup locale e la checklist **Zenzic Way**. +3. Ogni PR deve superare `nox -s preflight` e includere le intestazioni REUSE/SPDX sui nuovi file. + +Consulta anche il [Codice di Condotta](CODE_OF_CONDUCT.md) e la [Policy di Sicurezza](SECURITY.md). + +## Citare Zenzic + +Il file [`CITATION.cff`](CITATION.cff) è presente nella root del repository. GitHub lo +visualizza automaticamente — clicca **"Cite this repository"** sulla pagina del repo per +ottenere il riferimento in formato APA o BibTeX. + ## Licenza Apache-2.0 — vedi [LICENSE](LICENSE). diff --git a/README.md b/README.md index 1dc0ba0..c3944d1 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Zenzic provides an extensive, engineering-grade documentation portal: | --- | --- | --- | | Links | `zenzic check links` | Broken internal links, dead anchors, and **path traversal** attempts | | Orphans | `zenzic check orphans` | `.md` files absent from `nav` | -| Snippets | `zenzic check snippets` | Python code blocks that fail to compile | +| Snippets | `zenzic check snippets` | Python, YAML, JSON, and TOML blocks with syntax errors | | Placeholders | `zenzic check placeholders` | Stub pages and forbidden text patterns | | Assets | `zenzic check assets` | Images and files not referenced anywhere | | **References** | `zenzic check references` | Dangling References, Dead Definitions, **Zenzic Shield** | @@ -267,8 +267,23 @@ source .venv/bin/activate # Windows: .venv\Scripts\activate pip install zenzic ``` +### MkDocs rendering — `zenzic[docs]` extra + +Zenzic's core is dependency-free: linting raw Markdown requires nothing beyond `zenzic`. +The MkDocs stack is only needed to **render** your site, not to validate it. + +If you use MkDocs and also want the full build stack available: + +```bash +# uv +uv add --dev "zenzic[docs]" + +# pip +pip install "zenzic[docs]" +``` + > **Note:** -> All six checks — including `check links --strict` and `check references` — work on raw Markdown +> All seven checks — including `check links --strict` and `check references` — work on raw Markdown > source files via a native Python parser and `httpx`. **No MkDocs or Zensical installation is required** > for `check`, `score`, or `diff`. > @@ -370,13 +385,21 @@ Build aborted. Rotate the exposed credential immediately. 1. The Shield runs *inside* Pass 1 — before Pass 2 validates links and before any HTTP ping is issued. A document containing a leaked credential is never used to make outbound requests. 2. Patterns use exact-length quantifiers (`{48}`, `{36}`, `{16}`) — no backtracking, O(1) per line. -3. Three credential families are covered out of the box: +3. Seven credential families are covered out of the box: | Type | Pattern | | --- | --- | | OpenAI API key | `sk-[a-zA-Z0-9]{48}` | | GitHub token | `gh[pousr]_[a-zA-Z0-9]{36}` | | AWS access key | `AKIA[0-9A-Z]{16}` | +| Stripe live key | `sk_live_[0-9a-zA-Z]{24}` | +| Slack token | `xox[baprs]-[0-9a-zA-Z]{10,48}` | +| Google API key | `AIza[0-9A-Za-z\-_]{35}` | +| PEM private key | `-----BEGIN [A-Z ]+ PRIVATE KEY-----` | + +1. **No blind spots** — the Shield scans every line of the source file, including lines inside + fenced code blocks (`bash`, `yaml`, unlabelled, etc.). A credential committed inside a code + example is still a committed credential. > **Tip:** > Add `zenzic check references` to your pre-commit hooks to catch leaked credentials before they @@ -458,6 +481,21 @@ nox -s preflight # zenzic check all (self-check) --- +## Contributing + +We welcome bug reports, documentation improvements, and pull requests. Before you start: + +1. Open an issue to discuss the change — use the [bug report][issues], [feature request][issues], or [docs issue][issues] template. +2. Read the [Contributing Guide][contributing] — especially the **Local development setup** and the **Zenzic Way** checklist (pure functions, no subprocesses, source-first). +3. Every PR must pass `nox -s preflight` (tests + lint + typecheck + self-dogfood) and include REUSE/SPDX headers on new files. + +Please also review our [Code of Conduct][coc] and [Security Policy][security]. + +## Citing Zenzic + +A [`CITATION.cff`][citation-cff] file is present at the root of the repository. GitHub renders +it automatically — click **"Cite this repository"** on the repo page for APA or BibTeX output. + ## License Apache-2.0 — see [LICENSE][license]. @@ -484,3 +522,7 @@ Apache-2.0 — see [LICENSE][license]. [ci-workflow]: .github/workflows/zenzic.yml [contributing]: CONTRIBUTING.md [license]: LICENSE +[citation-cff]: CITATION.cff +[coc]: CODE_OF_CONDUCT.md +[security]: SECURITY.md +[issues]: https://github.com/PythonWoods/zenzic/issues diff --git a/RELEASE.it.md b/RELEASE.it.md index 67aab94..b3f02e3 100644 --- a/RELEASE.it.md +++ b/RELEASE.it.md @@ -3,8 +3,8 @@ # Zenzic v0.4.0: Il Framework Agnostico per l'Integrità della Documentazione -**Data di rilascio:** 2026-03-28 -**Stato:** Release Candidate 2 — pronto per la distribuzione +**Data di rilascio:** 2026-03-29 +**Stato:** Release Candidate 3 — pronto per la distribuzione --- @@ -89,6 +89,173 @@ regressioni. --- +## Cosa è cambiato in rc3 + +### Fix i18n Ancore — AnchorMissing ora ha la soppressione tramite fallback i18n + +`AnchorMissing` ora partecipa alla stessa logica di fallback i18n di `FileNotFound`. In +precedenza, un link come `[testo](it/pagina.md#intestazione)` generava un falso positivo quando +la pagina italiana esisteva ma la sua intestazione era tradotta — perché il ramo `AnchorMissing` +in `validate_links_async` non aveva nessun percorso di soppressione. `_should_suppress_via_i18n_fallback()` +era definita ma non veniva mai chiamata. + +**Fix:** nuovo metodo `resolve_anchor()` aggiunto al protocollo `BaseAdapter` e a tutti e tre +gli adapter (`MkDocsAdapter`, `ZensicalAdapter`, `VanillaAdapter`). Quando un'ancora non è +trovata in un file locale, `resolve_anchor()` verifica se l'ancora esiste nel file equivalente +nella locale di default tramite l'`anchors_cache` già in memoria. Nessun I/O su disco +aggiuntivo. + +### Utility condivisa — `remap_to_default_locale()` + +La logica di rimappatura dei percorsi locale che era duplicata indipendentemente in `resolve_asset()` +e `is_shadow_of_nav_page()` è ora una singola funzione pura in +`src/zenzic/core/adapters/_utils.py`. `resolve_asset()`, `resolve_anchor()` e +`is_shadow_of_nav_page()` in entrambi `MkDocsAdapter` e `ZensicalAdapter` vi delegano. +`_should_suppress_via_i18n_fallback()`, `I18nFallbackConfig`, `_I18N_FALLBACK_DISABLED` e +`_extract_i18n_fallback_config()` — 118 righe di codice morto — sono eliminati +permanentemente da `validator.py`. + +### Visual Snippets per i rilevamenti delle regole custom + +Le violazioni delle regole custom (`[[custom_rules]]` da `zenzic.toml`) mostrano ora la riga +sorgente incriminata sotto l'intestazione del rilevamento: + +```text +[ZZ-NODRAFT] docs/guide/install.md:14 — Remove DRAFT marker before publishing. + │ > DRAFT: section under construction +``` + +L'indicatore `│` è visualizzato nel colore della severity del rilevamento. I rilevamenti +standard (link non validi, orfani, ecc.) non sono interessati. + +### Schema JSON — 7 chiavi + +L'output `--format json` emette ora uno schema stabile a 7 chiavi: +`links`, `orphans`, `snippets`, `placeholders`, `unused_assets`, `references`, `nav_contract`. + +### `strict` e `exit_zero` come campi di `zenzic.toml` + +Entrambi i flag possono ora essere dichiarati in `zenzic.toml` come default a livello di progetto: + +```toml +strict = true # equivalente a passare sempre --strict +exit_zero = false # exit code 0 anche con rilevamenti (soft-gate CI) +``` + +I flag CLI continuano a sovrascrivere i valori TOML. + +### Suddivisione documentazione usage — tre pagine dedicate + +`docs/usage/index.md` era una pagina monolitica di 580 righe che copriva install, comandi, +CI/CD, punteggio, funzionalità avanzate e API programmatica. Suddivisa in tre pagine dedicate: + +- `usage/index.md` — Opzioni di installazione, workflow init→config→check, modalità engine +- `usage/commands.md` — Comandi CLI, flag, codici di uscita, output JSON, punteggio qualità +- `usage/advanced.md` — Pipeline tre-pass, Zenzic Shield, alt-text, API programmatica, + documentazione multilingua + +I mirror italiani (`it/usage/`) aggiornati con piena parità. + +### Validazione snippet multilingua + +`zenzic check snippets` valida ora quattro linguaggi usando parser puri in Python — nessun +sottoprocesso per nessun linguaggio. Python usa `compile()`, YAML usa `yaml.safe_load()`, JSON +usa `json.loads()` e TOML usa `tomllib.loads()` (stdlib Python 3.11+). I blocchi con tag di +linguaggio non supportati (`bash`, `javascript`, `mermaid`, ecc.) vengono trattati come testo +semplice e non controllati sintatticamente. + +### Shield deep-scan — nessun punto cieco + +Lo scanner di credenziali opera ora su ogni riga del file sorgente, incluse le righe dentro i +blocchi di codice delimitati. Una credenziale committata in un esempio `bash` è comunque una +credenziale committata — Zenzic la troverà. Il validatore di link e riferimenti continua a +ignorare il contenuto dei blocchi delimitati per prevenire falsi positivi dagli URL di esempio +illustrativi. + +Lo Shield copre ora sette famiglie di credenziali: chiavi API OpenAI, token GitHub, access key +AWS, chiavi live Stripe, token Slack, chiavi API Google e chiavi private PEM generiche. + +--- + +## Packaging Professionale & PEP 735 + +La rc3 adotta gli ultimi standard di packaging Python end-to-end, rendendo Zenzic più leggero +per gli utenti finali e misurabilmente più veloce in CI. + +### Core install snello + +`pip install zenzic` installa ora solo le cinque dipendenze runtime (`typer`, `rich`, +`pyyaml`, `pydantic`, `httpx`). L'intero stack MkDocs — precedente effetto collaterale +transitivo del gruppo dev monolitico — non viene più incluso a meno di una richiesta esplicita: + +```bash +pip install "zenzic[docs]" # MkDocs Material + mkdocstrings + plugin +``` + +Per la grande maggioranza degli utenti (siti Hugo, progetti Zensical, wiki Markdown semplici, +pipeline CI) questo significa un'installazione ~60% più piccola e tempi di cold-start +proporzionalmente più veloci sui runner CI effimeri. + +### PEP 735 — gruppi di dipendenze atomici + +Le dipendenze di sviluppo sono dichiarate come [gruppi PEP 735](https://peps.python.org/pep-0735/) +in `pyproject.toml`, gestiti da `uv`: + +| Gruppo | Scopo | Job CI | +| :----- | :---- | :----- | +| `test` | pytest + coverage | Matrix `quality` (3.11 / 3.12 / 3.13) | +| `lint` | ruff + mypy + pre-commit + reuse | Matrix `quality` | +| `docs` | Stack MkDocs | Job `docs` | +| `release` | nox + bump-my-version + pip-audit | Job `security` | +| `dev` | Tutti i precedenti (sviluppo locale) | — | + +Ogni job CI sincronizza solo il gruppo di cui ha bisogno. Il job `quality` non installa mai +lo stack MkDocs. Il job `docs` non installa mai pytest. Questo elimina il tempo di installazione +sprecato per pacchetti inutilizzati e riduce la superficie di potenziali conflitti tra job. +Combinato con la cache `uv` in GitHub Actions, le run CI successive ripristinano l'intero +ambiente in meno di 3 secondi. + +### `CITATION.cff` + +Il file [`CITATION.cff`](CITATION.cff) (formato CFF 1.2.0) è ora presente nella root del +repository. GitHub lo visualizza automaticamente come pulsante "Cite this repository". Zenodo, +Zotero e altri gestori di riferimenti bibliografici che supportano il formato possono +importarlo direttamente. + +--- + +## Il Firewall per la Documentazione + +La rc3 completa uno spostamento strategico in ciò che Zenzic è. Ha iniziato come un link checker. +È diventato un linter engine-agnostic. Con la rc3 diventa un **Firewall per la Documentazione** +— un unico gate che applica correttezza, completezza e sicurezza simultaneamente. + +Le tre dimensioni del firewall: + +**1. Correttezza** — Zenzic valida la sintassi di ogni blocco di dati strutturati nella tua +documentazione. I tuoi esempi YAML per Kubernetes, i tuoi frammenti JSON OpenAPI, i tuoi snippet +di configurazione TOML — se pubblichi esempi di configurazione errati, i tuoi utenti copieranno +configurazioni errate. `check snippets` rileva questo prima che raggiunga la produzione, usando +gli stessi parser che utilizzeranno gli utenti. + +**2. Completezza** — Il rilevamento degli orfani, la scansione dei placeholder e il gate +`fail_under` garantiscono che ogni pagina linkata nella nav esista, contenga contenuto reale e +ottenga un punteggio superiore alla soglia concordata dal team. Una documentazione non è "finita" +quando tutte le pagine esistono — è finita quando tutte le pagine sono complete. + +**3. Sicurezza** — Lo Shield scansiona ogni riga di ogni file, inclusi i blocchi di codice, per +sette famiglie di credenziali esposte. Nessun blocco, nessuna etichetta, nessuna annotazione può +nascondere un segreto a Zenzic. Il contratto del codice di uscita 2 è non negoziabile e non +sopprimibile: un segreto nella documentazione è un incidente bloccante per il build, non un +warning. + +Questo è il significato di "Firewall per la Documentazione": non uno strumento che si esegue una +volta prima di un rilascio, ma un gate che gira ad ogni commit, applica tre dimensioni di qualità +simultaneamente e esce con un codice machine-readable che la pipeline CI può interpretare senza +intervento umano. + +--- + ## Il Grande Disaccoppiamento (v0.4.0-rc2) La novità principale di questo rilascio è il sistema di **Scoperta Dinamica degli Adapter**. In @@ -222,11 +389,12 @@ Il flag CLI `--format` è invariato. Il parametro interno `format` nelle API Pyt ## Verifica e Checksum ```text -zenzic check all # self-dogfood: 6/6 OK -pytest # 433 passati, 0 falliti in 2.47s -coverage # 98.4% line coverage -ruff check . # 0 violazioni -mypy src/ # 0 errori +zenzic check all # self-dogfood: 7/7 OK +pytest # 446 passati, 0 falliti +coverage # ≥ 80% (gate rigido) +ruff check . # 0 violazioni +mypy src/ # 0 errori +mkdocs build --strict # 0 avvertimenti ``` --- diff --git a/RELEASE.md b/RELEASE.md index c2ce5e0..d7d86a0 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -3,8 +3,8 @@ # Zenzic v0.4.0: The Agnostic Framework for Documentation Integrity -**Release date:** 2026-03-28 -**Status:** Release Candidate 2 — ready for shipment +**Release date:** 2026-03-29 +**Status:** Release Candidate 3 — ready for shipment --- @@ -58,6 +58,163 @@ and regression detection are built. --- +## What Changed in rc3 + +### i18n Anchor Fix — AnchorMissing now has i18n fallback suppression + +`AnchorMissing` now participates in the same i18n fallback logic as `FileNotFound`. Previously, +a link like `[text](it/page.md#heading)` would fire a false positive when the Italian page existed +but its heading was translated — because the `AnchorMissing` branch in `validate_links_async` had +no suppression path. `_should_suppress_via_i18n_fallback()` was defined but never called. + +**Fix:** new `resolve_anchor()` method added to `BaseAdapter` protocol and all three adapters +(`MkDocsAdapter`, `ZensicalAdapter`, `VanillaAdapter`). When an anchor is not found in a locale +file, `resolve_anchor()` checks whether the anchor exists in the default-locale equivalent via +the `anchors_cache` already in memory. No additional disk I/O. + +### Shared utility — `remap_to_default_locale()` + +The locale path-remapping logic that was independently duplicated in `resolve_asset()` and +`is_shadow_of_nav_page()` is now a single pure function in `src/zenzic/core/adapters/_utils.py`. +`resolve_asset()`, `resolve_anchor()`, and `is_shadow_of_nav_page()` in both `MkDocsAdapter` and +`ZensicalAdapter` all delegate to it. `_should_suppress_via_i18n_fallback()`, `I18nFallbackConfig`, +`_I18N_FALLBACK_DISABLED`, and `_extract_i18n_fallback_config()` — 118 lines of dead code — +are permanently removed from `validator.py`. + +### Visual Snippets for custom rule findings + +Custom rule violations (`[[custom_rules]]` from `zenzic.toml`) now display the offending source +line below the finding header: + +```text +[ZZ-NODRAFT] docs/guide/install.md:14 — Remove DRAFT marker before publishing. + │ > DRAFT: section under construction +``` + +The `│` indicator is rendered in the finding's severity colour. Standard findings (broken links, +orphans, etc.) are unaffected. + +### JSON schema — 7 keys + +`--format json` output now emits a stable 7-key schema: +`links`, `orphans`, `snippets`, `placeholders`, `unused_assets`, `references`, `nav_contract`. + +### `strict` and `exit_zero` as `zenzic.toml` fields + +Both flags can now be declared in `zenzic.toml` as project-level defaults: + +```toml +strict = true # equivalent to always passing --strict +exit_zero = false # exit code 0 even on findings (CI soft-gate) +``` + +CLI flags continue to override the TOML values. + +### Usage docs split — three focused pages + +`docs/usage/index.md` was a monolithic 580-line page covering install, commands, CI/CD, scoring, +advanced features, and programmatic API. Split into three focused pages: + +- `usage/index.md` — Install options, init→config→check workflow, engine modes +- `usage/commands.md` — CLI commands, flags, exit codes, JSON output, quality score +- `usage/advanced.md` — Three-pass pipeline, Zenzic Shield, alt-text, programmatic API, + multi-language docs + +Italian mirrors (`it/usage/`) updated in full parity. + +### Multi-language snippet validation + +`zenzic check snippets` now validates four languages using pure Python parsers — no subprocesses +for any language. Python uses `compile()`, YAML uses `yaml.safe_load()`, JSON uses `json.loads()`, +and TOML uses `tomllib.loads()` (Python 3.11+ stdlib). Blocks with unsupported language tags +(`bash`, `javascript`, `mermaid`, etc.) are treated as plain text and not syntax-checked. + +### Shield deep-scan — no more blind spots + +The credential scanner now operates on every line of the source file, including lines inside +fenced code blocks. A credential committed in a `bash` example is still a committed credential — +Zenzic will find it. The link and reference validators continue to ignore fenced block content to +prevent false positives from illustrative example URLs. + +The Shield now covers seven credential families: OpenAI API keys, GitHub tokens, AWS access keys, +Stripe live keys, Slack tokens, Google API keys, and generic PEM private keys. + +--- + +## Professional Packaging & PEP 735 + +v0.4.0-rc3 adopts the latest Python packaging standards end-to-end, making Zenzic lighter for +end users and measurably faster in CI. + +### Lean core install + +`pip install zenzic` now installs only the five runtime dependencies (`typer`, `rich`, +`pyyaml`, `pydantic`, `httpx`). The entire MkDocs stack — previously a transitive side-effect +of the monolithic dev group — is no longer pulled in unless explicitly requested: + +```bash +pip install "zenzic[docs]" # MkDocs Material + mkdocstrings + plugins +``` + +For the vast majority of users (Hugo sites, Zensical projects, plain Markdown wikis, CI +pipelines) this means a ~60% smaller install and proportionally faster cold-start times on +ephemeral CI runners. + +### PEP 735 — atomic dependency groups + +Development dependencies are declared as [PEP 735](https://peps.python.org/pep-0735/) groups +in `pyproject.toml`, managed by `uv`: + +| Group | Purpose | CI job | +| :---- | :------ | :----- | +| `test` | pytest + coverage | `quality` matrix (3.11 / 3.12 / 3.13) | +| `lint` | ruff + mypy + pre-commit + reuse | `quality` matrix | +| `docs` | MkDocs stack | `docs` job | +| `release` | nox + bump-my-version + pip-audit | `security` job | +| `dev` | All of the above (local development) | — | + +Each CI job syncs only the group it needs. The `quality` job never installs the MkDocs stack. +The `docs` job never installs pytest. This eliminates install time wasted on unused packages +and reduces the surface area for dependency conflicts across jobs. Combined with the `uv` +cache in GitHub Actions, subsequent CI runs restore the full environment in under 3 seconds. + +### `CITATION.cff` + +A [`CITATION.cff`](CITATION.cff) file (CFF 1.2.0 format) is now present at the repository +root. GitHub renders it automatically as a "Cite this repository" button. Zenodo, Zotero, and +other reference managers that support the format can import it directly. + +--- + +## The Documentation Firewall + +v0.4.0-rc3 completes a strategic shift in what Zenzic is. It began as a link checker. It became +an engine-agnostic linter. With rc3, it becomes a **Documentation Firewall** — a single gate that +enforces correctness, completeness, and security simultaneously. + +The three dimensions of the firewall: + +**1. Correctness** — Zenzic validates the syntax of every structured data block in your docs. +Your Kubernetes YAML examples, your OpenAPI JSON fragments, your TOML configuration snippets — if +you ship broken config examples, your users will copy broken config. `check snippets` catches this +before it reaches production, using the same parsers your users will run. + +**2. Completeness** — Orphan detection, placeholder scanning, and the `fail_under` quality gate +ensure that every page linked in the nav exists, contains real content, and scores above the +team's agreed threshold. A documentation site is not "done" when all pages exist — it is done +when all pages are complete. + +**3. Security** — The Shield scans every line of every file, including code blocks, for seven +families of leaked credentials. No fencing, no labels, no annotations can hide a secret from +Zenzic. The exit code 2 contract is non-negotiable and non-suppressible: a secret in docs is a +build-blocking incident, not a warning. + +This is what "Documentation Firewall" means: not a tool you run once before a release, but a +gate that runs on every commit, enforces three dimensions of quality simultaneously, and exits +with a machine-readable code that your CI pipeline can act on without human interpretation. + +--- + ## The Great Decoupling (v0.4.0-rc2) The headline change in this release is the **Dynamic Adapter Discovery** system. In v0.3.x, @@ -177,11 +334,12 @@ and `diff` Python APIs has been renamed to `output_format` — update any progra ## Checksums and verification ```text -zenzic check all # self-dogfood: 6/6 OK -pytest # 433 passed, 0 failed in 2.47s -coverage # 98.4% line coverage +zenzic check all # self-dogfood: 7/7 OK +pytest # 446 passed, 0 failed +coverage # ≥ 80% (hard gate) ruff check . # 0 violations mypy src/ # 0 errors +mkdocs build --strict # 0 warnings ``` --- diff --git a/SECURITY.md b/SECURITY.md index f1219db..f1e87f1 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -5,40 +5,62 @@ SPDX-License-Identifier: Apache-2.0 # Security Policy -## Reporting Security Issues +## Reporting a vulnerability -The Zenzic team takes security seriously. If you discover a vulnerability, please report it responsibly — **do not open a public issue**. +**Please do not open a public GitHub issue for security vulnerabilities.** -### How to Report +If you discover a security vulnerability in Zenzic — including issues with the **Shield** +credential scanner, the **path traversal** protection, or any other part of the Core — +report it privately via one of these channels: -- **GitHub Security Advisories**: Open a private advisory at [github.com/PythonWoods/zenzic/security/advisories](https://github.com/PythonWoods/zenzic/security/advisories) -- **Email**: `dev@pythonwoods.dev` with `[SECURITY]` in the subject line +- **GitHub Security Advisories** (preferred): [github.com/PythonWoods/zenzic/security/advisories](https://github.com/PythonWoods/zenzic/security/advisories) +- **Email**: `dev@pythonwoods.dev` — subject line: `[SECURITY] Zenzic — ` -### What to Include +Please include a clear description of the vulnerability, steps to reproduce, potential +impact, and a suggested fix if available. -- Clear description of the vulnerability -- Steps to reproduce -- Potential impact and scope -- Suggested remediation (if available) +We will acknowledge your report within **72 hours** and aim to release a patch within +**14 days** of confirming the issue. -## Response Process +## Scope -1. Acknowledgment within 48 hours -2. Investigation and validation -3. Fix development and testing -4. Coordinated disclosure +The following areas are in-scope for security reports: -Initial triage and plan within 72 hours. +| Area | Description | +| :--- | :---------- | +| **Shield bypass** | A credential pattern that passes undetected through the seven-family scanner | +| **Path traversal bypass** | A crafted link that escapes the `docs/` root without triggering `PathTraversal` | +| **Dependency CVE** | A known CVE in a runtime dependency (`typer`, `rich`, `pyyaml`, `pydantic`, `httpx`) | +| **Code execution** | A crafted Markdown or config file that causes arbitrary code execution during linting | +| **Exit code suppression** | Any method that prevents exit code `2` from being emitted on a Shield finding | -## Supported Versions +Out-of-scope: documentation content errors, cosmetic output formatting, or issues that +only affect `nox` development sessions (not the published `zenzic` package). -Only the latest release and the `main` branch receive active security updates. +## Security design notes -## Security Notes +Zenzic v0.4.0+ has **no subprocess calls** in the linting path. The tool reads raw source +files and performs all analysis in pure Python. It does not execute `mkdocs build` or any +other build tool during `check`, `score`, or `diff`. The attack surface is limited to: -Zenzic is a CLI tool that reads local files and runs `mkdocs build` as a subprocess. It does not handle credentials, network requests, or user-supplied data from external sources. The primary security surface is: +- **Crafted Markdown files** — Zenzic parses Markdown with a pure state-machine; the path + traversal shield rejects any href that resolves outside `docs/`. +- **Crafted config files** — `zenzic.toml`, `mkdocs.yml`, and `zensical.toml` are parsed + as plain data (TOML/YAML). No code is evaluated. Custom rules (`[[custom_rules]]`) are + plain regex patterns compiled once at load time. +- **Dependencies** — run `nox -s security` (pip-audit) regularly to detect known CVEs in + the dependency tree. -- **Subprocess execution**: `mkdocs build --strict` is invoked with the repository's own `mkdocs.yml` — only run Zenzic against repositories you trust. -- **Dependencies**: keep dependencies up to date; run `nox -s security` (pip-audit) regularly to detect known CVEs. -- **Path Traversal Protection**: Zenzic v0.3.0+ implements the Zenzic Shield inside `InMemoryPathResolver`. While Zenzic mitigates unauthorized file access via crafted Markdown links (e.g. `../../../../etc/passwd`), Zenzic is a static analysis tool; it does not replace filesystem-level permissions. -- **Path Traversal Protection**: Zenzic v0.3.0+ implements the Zenzic Shield inside `InMemoryPathResolver`. While we mitigate unauthorised file access via crafted Markdown links (e.g. `../../../../etc/passwd`-style hrefs), Zenzic is a static analysis tool; it does not replace filesystem-level permissions or OS security controls. +## Supported versions + +| Version | Support status | +| :------ | :------------- | +| `0.4.x` (current) | ✅ Security fixes backported | +| `0.3.x` | ⚠️ Critical fixes only | +| `< 0.3` | ❌ End of life — upgrade recommended | + +## Disclosure policy + +We follow a **coordinated disclosure** model. We ask that you allow up to 14 days for a +patch to be released before any public disclosure. Confirmed reporters will be credited in +the release changelog unless they prefer to remain anonymous. diff --git a/docs/checks.md b/docs/checks.md index 7de0694..3d7c2db 100644 --- a/docs/checks.md +++ b/docs/checks.md @@ -39,13 +39,13 @@ Zenzic runs six independent checks. Each addresses a distinct category of docume Media files that exist on disk but are never referenced. __Supports autofix.__ - [`check assets`](#assets)  •  [`clean assets`](usage/index.md#autofix-cleanup) + [`check assets`](#assets)  •  [`clean assets`](usage/commands.md#autofix-cleanup) - :lucide-shield-check:   __References__ Dangling reference links, dead definitions, and leaked credentials (exit code 2). - [`zenzic check references`](usage/index.md#reference-integrity-v020) + [`zenzic check references`](usage/advanced.md#reference-integrity-v020) @@ -138,30 +138,45 @@ __CLI:__ `zenzic check snippets` Code examples in documentation are tested less rigorously than production code. A snippet that worked when it was written may have a syntax error introduced by a refactor, a copy-paste mistake, or a manual edit that was never reviewed. Readers who copy broken code waste time debugging errors that have nothing to do with their actual problem. -`zenzic check snippets` extracts all fenced code blocks tagged `` ```python `` or `` ```py `` and compiles each one with Python's built-in `compile()` in `exec` mode. Only syntax is checked — runtime errors are not detected. +`zenzic check snippets` validates the syntax of fenced code blocks using pure-Python parsers — no subprocesses are spawned for any language. + +__Supported languages:__ + +| Language tag | Parser | What is checked | +| :--- | :--- | :--- | +| `` python ``, `` py `` | `compile()` in `exec` mode | Python 3.11+ syntax | +| `` yaml ``, `` yml `` | `yaml.safe_load()` | YAML 1.1 structure | +| `` json `` | `json.loads()` | JSON syntax | +| `` toml `` | `tomllib.loads()` (stdlib 3.11+) | TOML v1.0 syntax | + +Blocks tagged with any other language (`` bash ``, `` javascript ``, `` mermaid ``, etc.) are treated as plain text and are not syntax-checked. However, __every fenced block is still scanned by the Zenzic Shield__ for credential patterns — syntax validation and security scanning are independent. __CLI behaviour:__ walks `docs_dir`, reads each `.md` file, and calls `check_snippet_content(text, file_path, config)` on the raw content. -__Block extraction:__ Zenzic uses a deterministic line-by-line state machine rather than a regex to extract Python blocks. This prevents false positives from inline code spans (e.g., `` ` ```python ` `` in prose text) and is robust against `pymdownx.superfences` documents with interleaved Mermaid or other custom fences. See [Architecture — State-machine parsing](architecture.md#state-machine-parsing-and-superfences-false-positives) for details. +__Block extraction:__ Zenzic uses a deterministic line-by-line state machine rather than a regex to extract code blocks. This prevents false positives from inline code spans (e.g., `` ` ```python ` `` in prose text) and is robust against `pymdownx.superfences` documents with interleaved Mermaid or other custom fences. See [Architecture — State-machine parsing](architecture.md#state-machine-parsing-and-superfences-false-positives) for details. __What it catches:__ -- `SyntaxError` — missing colons, unmatched brackets, invalid expressions -- Parser crashes — `MemoryError`, `RecursionError` from pathological inputs +- Python: `SyntaxError` — missing colons, unmatched brackets, invalid expressions; parser crashes (`MemoryError`, `RecursionError`) +- YAML: structural errors — unclosed sequences, invalid mappings, duplicate keys +- JSON: `JSONDecodeError` — trailing commas, missing quotes, unmatched brackets +- TOML: `TOMLDecodeError` — missing quotes on values, invalid key syntax, type mismatches __What it does not catch:__ - Runtime errors (`NameError`, `TypeError`, `ImportError`, etc.) — only syntax is checked - Intentionally incomplete snippets — fragments, ellipsis stubs, pseudo-code +- Bash, JavaScript, or any other language without a pure-Python parser __Tuning:__ use `snippet_min_lines` in `zenzic.toml` to skip short blocks. The default of `1` checks everything including single-line blocks. Set it to `3` or higher to ignore import stubs and one-liners that are likely illustrative rather than executable. __Example output:__ ```text -INVALID SNIPPETS (2): - tutorial.md:48 - SyntaxError in Python snippet — invalid syntax - api/reference.md:112 - SyntaxError in Python snippet — expected ':' +INVALID SNIPPETS (3): + tutorial.md:48 - SyntaxError in Python snippet — expected ':' + config/reference.md:22 - SyntaxError in YAML snippet — mapping values are not allowed here + api/reference.md:112 - SyntaxError in JSON snippet — Expecting property name enclosed in double quotes ``` --- @@ -184,7 +199,7 @@ __CLI behaviour:__ reads each `.md` file and calls `check_placeholder_content(te __Tuning:__ -```toml +```text # zenzic.toml # Raise the threshold for projects with dense, concise pages diff --git a/docs/ci-cd.md b/docs/ci-cd.md index 49dbb9c..4648f36 100644 --- a/docs/ci-cd.md +++ b/docs/ci-cd.md @@ -27,11 +27,13 @@ zenzic score --format json "status": "passed", "exit_code": 0, "checks": { - "links": {"status": "passed", "errors": 0}, - "orphans": {"status": "passed", "errors": 0}, - "snippets": {"status": "passed", "errors": 0}, - "placeholders": {"status": "passed", "errors": 0}, - "assets": {"status": "passed", "errors": 0} + "links": {"status": "passed", "errors": 0}, + "orphans": {"status": "passed", "errors": 0}, + "snippets": {"status": "passed", "errors": 0}, + "placeholders": {"status": "passed", "errors": 0}, + "unused_assets": {"status": "passed", "errors": 0}, + "references": {"status": "passed", "errors": 0}, + "nav_contract": {"status": "passed", "errors": 0} } } ``` diff --git a/docs/community/contribute/pull-requests.md b/docs/community/contribute/pull-requests.md index 0c79ffd..6b83ba9 100644 --- a/docs/community/contribute/pull-requests.md +++ b/docs/community/contribute/pull-requests.md @@ -13,21 +13,31 @@ effort and ensure the integrity of the codebase. ## Local development setup -Clone the repository and install Zenzic in editable mode so that your local -changes are reflected immediately without reinstalling. +Clone the repository and set up the full development environment in one step: + +```bash +git clone https://github.com/PythonWoods/zenzic.git +cd zenzic +nox -s dev +``` + +`nox -s dev` runs `uv sync --group dev` (installing all dependency groups — test, lint, docs, +and release tooling) and then installs the pre-commit hooks. It is the canonical one-shot +setup command; run it once after cloning. + +For a lower-level setup or if you do not have `nox` installed yet, install with `uv` directly: === ":simple-astral: uv (recommended)" ```bash git clone https://github.com/PythonWoods/zenzic.git cd zenzic - uv venv + uv sync --group dev source .venv/bin/activate # Windows: .venv\Scripts\activate - uv pip install -e . ``` - [`uv`](https://docs.astral.sh/uv/) resolves dependencies significantly faster than pip and produces - a reproducible environment via `uv.lock`. Preferred for all development work. + [`uv`](https://docs.astral.sh/uv/) resolves dependencies significantly faster than pip and + produces a reproducible environment via `uv.lock`. Preferred for all development work. === ":simple-pypi: pip" @@ -36,15 +46,38 @@ changes are reflected immediately without reinstalling. cd zenzic python -m venv .venv source .venv/bin/activate # Windows: .venv\Scripts\activate - pip install -e . + pip install -e ".[docs]" + pip install pytest pytest-cov ruff mypy pre-commit reuse ``` +### Dependency groups + +Zenzic uses [PEP 735](https://peps.python.org/pep-0735/) dependency groups to keep CI fast +by installing only what each job needs. The groups are: + +| Group | Contents | When to use | +| :---- | :------- | :---------- | +| `test` | `pytest`, `pytest-cov` | Running the test suite | +| `lint` | `ruff`, `mypy`, `pre-commit`, `reuse` | Linting and type checking | +| `docs` | MkDocs stack (`mkdocs-material`, etc.) | Building the documentation | +| `release` | `nox`, `bump-my-version`, `pip-audit` | Releases and audits | +| `dev` | All of the above (aggregator) | Local development | + +Install a single group when you only need a subset: + +```bash +uv sync --group test # just pytest +uv sync --group lint # just ruff + mypy +uv sync --group docs # just the MkDocs stack +uv sync --group dev # everything (recommended for contributors) +``` + With an editable install, the `zenzic` binary on your `PATH` always runs the source you are working on. Validate the repository's own documentation at any time: ```bash -zenzic check all # all six checks +zenzic check all # all seven checks zenzic check references # includes custom [[custom_rules]] evaluation pytest # full test suite ``` diff --git a/docs/community/faqs.md b/docs/community/faqs.md index dc54c5b..561378b 100644 --- a/docs/community/faqs.md +++ b/docs/community/faqs.md @@ -19,10 +19,11 @@ below or use the search bar to find what you need. **What is Zenzic?** -Zenzic is a high-performance documentation linter for any Markdown-based project. It works -natively with MkDocs and Zensical, and supports any build engine via the adapter system. It -detects broken links, orphan pages, placeholder stubs, missing assets, and more — at source -level, before the build runs. +Zenzic is a high-performance documentation linter for any Markdown-based project. It operates +on raw source files — never the generated output — so it works with any build engine: MkDocs, +Zensical, or any other static site generator via the adapter system. It detects broken links, +orphan pages, placeholder stubs, unused assets, leaked credentials, and more — before the +build runs. **Is Zenzic free?** @@ -49,14 +50,16 @@ Or add it to your project with `uv add --dev zenzic` (recommended) or `pip insta **Do I need a `zenzic.toml` file?** -No. Zenzic works with zero configuration — the defaults cover most standard MkDocs projects. -`zenzic.toml` is only needed to customise behaviour, such as excluding specific directories, +No. Zenzic works with zero configuration — the defaults cover most standard projects out of the +box. `zenzic.toml` is only needed to customise behaviour, such as excluding specific directories, assets, or external URLs. **Can I use Zenzic with a non-MkDocs project?** -Currently Zenzic supports MkDocs and Zensical. Support for other engines is planned. -See the [Engines guide](../guides/engines.md) for details. +Yes. Zenzic works on any folder of Markdown files without requiring a build engine at all +(Vanilla mode). Native adapters for MkDocs and Zensical add nav-awareness and i18n support. +Third-party adapters can extend this to any other engine. See the [Engines guide](../guides/engines.md) +for details. --- @@ -70,14 +73,22 @@ useful for continuous monitoring and badges. **What is an "orphan page"?** -An orphan page is a Markdown file present in `docs/` but absent from the navigation (`nav:`) -in `mkdocs.yml`. Orphan pages are unreachable by users but add noise and confusion. -Zenzic reports them so you stay in control. +An orphan page is a Markdown file present in `docs/` but absent from the site navigation +declared in your build engine's configuration file. Orphan pages are unreachable by users +navigating the site structure. Zenzic reports them so you stay in control. **The external link check is slow. Can I disable it?** -You can exclude specific URLs with `excluded_external_urls` in `zenzic.toml`. To skip external -link checking entirely, use `zenzic check links --no-external`. +External link validation only runs when `--strict` is passed — omitting the flag disables all +network requests entirely. To permanently suppress specific URLs without removing strict mode, +add their prefixes to `excluded_external_urls` in `zenzic.toml`: + +```toml +excluded_external_urls = [ + "https://internal.company.com", + "https://github.com/MyOrg/private-repo", +] +``` **Does Zenzic check links in images too?** @@ -99,8 +110,14 @@ For the full setup with dynamic badges and regression detection, see the [CI/CD **What does the `--strict` flag do?** -In strict mode, any warning becomes an error. Recommended in CI pipelines to ensure no issue -slips through unnoticed. +The `--strict` flag has two effects depending on the command: + +- `zenzic check links --strict` / `zenzic check all --strict`: also validates external HTTP/HTTPS + links via network requests (disabled by default for speed). +- `zenzic check references --strict`: treats Dead Definitions (reference links defined but never + used) as hard errors instead of warnings. + +Recommended in CI pipelines to catch all classes of issues. **What is the Zenzic Shield (exit code 2)?** diff --git a/docs/community/index.md b/docs/community/index.md index 1e79b38..31db994 100644 --- a/docs/community/index.md +++ b/docs/community/index.md @@ -66,4 +66,30 @@ from the community of its users. Our open approach allows you to: [How to contribute](contribute/index.md){ .md-button .md-button--primary } +--- + +## Citing Zenzic + +If you use Zenzic in your research, documentation pipelines, or technical reports and would +like to cite it formally, a [`CITATION.cff`][citation-cff] file is present at the root of +the repository. Most research tools (GitHub, Zenodo, Zotero) can read this format directly. + +__GitHub__: click the __"Cite this repository"__ button on the [repository page][repo] — +GitHub automatically renders the `CITATION.cff` into APA and BibTeX. + +__Manual BibTeX:__ + +```text +@software{zenzic, + author = {PythonWoods}, + title = {{Zenzic: The Agnostic Documentation Integrity Framework}}, + version = {0.4.0rc3}, + date = {2026-03-29}, + url = {https://zenzic.pythonwoods.dev/}, + license = {Apache-2.0}, +} +``` + + [citation-cff]: https://github.com/PythonWoods/zenzic/blob/main/CITATION.cff [issue tracker]: https://github.com/PythonWoods/zenzic/issues + [repo]: https://github.com/PythonWoods/zenzic diff --git a/docs/configuration/core-settings.md b/docs/configuration/core-settings.md index 21d2680..0669f1a 100644 --- a/docs/configuration/core-settings.md +++ b/docs/configuration/core-settings.md @@ -240,3 +240,46 @@ To run in observational mode regardless of what `zenzic.toml` contains: ```bash zenzic score --fail-under 0 ``` + +--- + +## `strict` + +**Type:** boolean — **Default:** `false` + +When `true`, every invocation of `zenzic check all`, `zenzic score`, and `zenzic diff` behaves +as if `--strict` were passed: external URLs are validated via network requests and warnings are +treated as errors. + +Use this to make strict mode the permanent default for a project, without adding `--strict` to +every CI command: + +```toml +strict = true +``` + +The `--strict` CLI flag overrides this value for a single run (setting it for that run +regardless of what `zenzic.toml` contains). + +--- + +## `exit_zero` + +**Type:** boolean — **Default:** `false` + +When `true`, `zenzic check all` always exits with code `0` even when issues are found. All +findings are still printed and included in the quality score — only the exit code is suppressed. + +Use this during an active documentation improvement sprint when you want full visibility without +blocking the pipeline: + +```toml +exit_zero = true +``` + +The `--exit-zero` CLI flag overrides this value for a single run. + +!!! warning "Use sparingly" + Setting `exit_zero = true` in `zenzic.toml` disables the quality gate globally. Prefer + using `--exit-zero` as a temporary CLI flag during cleanup sprints, and removing it once + the baseline is clean. diff --git a/docs/configuration/custom-rules-dsl.md b/docs/configuration/custom-rules-dsl.md index edf611c..e1727ec 100644 --- a/docs/configuration/custom-rules-dsl.md +++ b/docs/configuration/custom-rules-dsl.md @@ -58,6 +58,23 @@ surfaced in the report but do not affect the exit code unless `--strict` is pass --- +## Output format + +When a custom rule fires, Zenzic prints the finding with a visual snippet showing the offending +line: + +```text +[ZZ-NODRAFT] docs/guide/install.md:14 — Remove DRAFT marker before publishing. + │ > DRAFT: section under construction +``` + +For `"error"` severity the rule ID is printed in red; for `"warning"` in yellow. The `│` line +shows the raw source line exactly as it appears in the file. + +Rules with `severity = "info"` are printed without the `│` snippet. + +--- + ## Adapter-independence **Custom rules are adapter-independent.** A rule searching for `DRAFT` fires identically whether diff --git a/docs/developers/examples.md b/docs/developers/examples.md new file mode 100644 index 0000000..027da85 --- /dev/null +++ b/docs/developers/examples.md @@ -0,0 +1,141 @@ +--- +icon: lucide/folder-open +--- + + + + +# Example Projects + +The `examples/` directory at the repository root contains four self-contained +projects. Each is a runnable fixture: navigate into the directory and run +`zenzic check all` to see its output. + +```bash +git clone https://github.com/PythonWoods/zenzic +cd zenzic/examples/ +zenzic check all +``` + +--- + +## broken-docs — Intentional Failures Fixture + +**Purpose:** Trigger every Zenzic check at least once. Useful when debugging a +new check or verifying that an error message is correctly formatted. + +**Expected result:** `FAILED` — multiple check failures, exit code 1. + +| Check | What triggers it | +| --- | --- | +| Links | Missing file, dead anchor, path traversal, absolute path, broken i18n | +| Orphans | `api.md` exists on disk but is absent from the `nav` | +| Snippets | Python block with a `SyntaxError` (missing colon) | +| Placeholders | `api.md` has only 18 words and a bare task marker | +| Assets | `assets/unused.png` is on disk but never referenced | +| Custom rules | `ZZ-NOFIXME` pattern in `zenzic.toml` | + +```bash +cd examples/broken-docs +zenzic check all # exit 1 +zenzic check all --exit-zero # exit 0 (soft-gate mode) +``` + +Engine: `mkdocs`. Also ships a `zensical.toml` to demonstrate the same fixture +under the Zensical engine. + +--- + +## i18n-standard — Gold Standard Bilingual Project + +**Purpose:** Demonstrate a perfectly clean bilingual project that scores 100/100. +Use this as the reference template when starting a new multilingual docs project. + +**Expected result:** `SUCCESS` — all checks pass, score 100/100. + +Key patterns this example demonstrates: + +- **Suffix-mode i18n** — translations live as `page.it.md` siblings, never in a + `docs/it/` subtree +- **Path symmetry** — `../../assets/brand-kit.zip` resolves identically from + both `page.md` and `page.it.md` +- **Build artifact exclusion** — `excluded_build_artifacts` lets Zenzic validate + links to generated files without requiring them on disk +- **`fail_under = 100`** — any regression breaks the gate + +```bash +cd examples/i18n-standard +zenzic check all --strict # exit 0, score 100/100 +``` + +Engine: `mkdocs` with `i18n` plugin in `docs_structure: suffix` mode. + +--- + +## security_lab — Zenzic Shield Test Fixture + +**Purpose:** Exercise the Shield subsystem — credential detection and path +traversal classification — before releases. + +**Expected result:** `FAILED` — exit code 2 (Shield event; non-suppressible). + +| File | What it triggers | +| --- | --- | +| `traversal.md` | `PathTraversal`: `../../etc/passwd` escapes `docs/` | +| `attack.md` | `PathTraversal` + seven fake credential patterns (all Shield families) | +| `absolute.md` | Absolute paths (`/assets/logo.png`, `/etc/passwd`) | +| `fenced.md` | Fake credentials inside unlabelled and `bash` fenced blocks | + +```bash +cd examples/security_lab +zenzic check links --strict # exit 1 (path traversal) +zenzic check references # exit 2 (Shield: fake credentials) +zenzic check all # exit 2 (Shield takes priority) +``` + +> The credentials in `attack.md` and `fenced.md` are entirely synthetic — they +> match the regex shape but are not valid tokens for any service. + +Engine: `mkdocs`. + +--- + +## vanilla — Engine-Agnostic Quality Gate + +**Purpose:** Show Zenzic running without any build engine. No `mkdocs.yml`, +no `zensical.toml`, no Hugo config. Just `engine = "vanilla"` in `zenzic.toml`. + +**Expected result:** `SUCCESS` — all applicable checks pass. + +What works in Vanilla mode: + +- Links, snippets, placeholders, and assets are fully checked +- `[[custom_rules]]` fire identically to any other mode +- `fail_under` enforces a minimum quality score +- The **orphan check is skipped** — with no declared nav there is no reference set + +```bash +cd examples/vanilla +zenzic check all # exit 0 +``` + +Use Vanilla mode for Hugo, Docusaurus, Sphinx, Astro, Jekyll, GitHub wikis, +or any project that does not use MkDocs or Zensical. + +--- + +## Running the full examples suite + +From the repository root, verify all examples produce their expected exit codes: + +```bash +# Gold standard and vanilla: must be clean +(cd examples/i18n-standard && zenzic check all --strict) +(cd examples/vanilla && zenzic check all) + +# Broken: must fail with exit 1 +(cd examples/broken-docs && zenzic check all); [ $? -eq 1 ] + +# Security lab: must exit with code 2 (Shield) +(cd examples/security_lab && zenzic check all); [ $? -eq 2 ] +``` diff --git a/docs/developers/writing-an-adapter.md b/docs/developers/writing-an-adapter.md index 328b911..c1ad6d1 100644 --- a/docs/developers/writing-an-adapter.md +++ b/docs/developers/writing-an-adapter.md @@ -17,15 +17,17 @@ An **adapter** is a Python class that satisfies the `BaseAdapter` protocol scanner, orphan detector, and link validator talk exclusively to this protocol — they never import or call engine-specific code directly. -An adapter answers four questions for each docs tree: +An adapter answers seven questions for each docs tree: | Method | Question | |---|---| | `is_locale_dir(part)` | Is this top-level directory a non-default locale? | | `resolve_asset(missing_abs, docs_root)` | Does a default-locale fallback exist for this missing asset? | +| `resolve_anchor(resolved_file, anchor, anchors_cache, docs_root)` | Should this anchor miss be suppressed because the anchor exists in the default-locale equivalent? | | `is_shadow_of_nav_page(rel, nav_paths)` | Is this file a locale mirror of a nav-listed page? | | `get_ignored_patterns()` | Which filename globs should the orphan check skip? | | `get_nav_paths()` | Which `.md` paths are listed in this engine's nav config? | +| `has_engine_config()` | Was a build-engine config file found on disk? (Controls orphan check activation.) | --- @@ -70,6 +72,34 @@ class MyEngineAdapter: """ return None + def resolve_anchor( + self, + resolved_file: Path, + anchor: str, + anchors_cache: dict[Path, set[str]], + docs_root: Path, + ) -> bool: + """Return True if an anchor miss on a locale file should be suppressed. + + Called when a link points to a heading anchor that exists in the + default-locale file but not in the locale translation (because + headings are translated). Return True to suppress the false positive. + + If your engine does not support i18n, always return False. + """ + return False + + def has_engine_config(self) -> bool: + """Return True when a build-engine config was found and loaded. + + When False, the orphan check is skipped — with no nav information + there is no reference set to compare the file list against. + + Return True if your adapter successfully loaded a config file. + Return False only if no engine config exists (bare/vanilla mode). + """ + return bool(self._config) + def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: """Return True when *rel* is a locale mirror of a nav-listed page. @@ -199,6 +229,9 @@ incorrect results: 4. All methods must be **pure**: same inputs always produce the same outputs. No I/O, no global-state mutation. 5. `resolve_asset()` must never raise — return `None` on any failure. +6. `resolve_anchor()` must never raise — return `False` on any failure. + The `anchors_cache` argument is read-only; do not mutate it. +7. `has_engine_config()` must never raise — return `False` on any failure. --- diff --git a/docs/docs/index.md b/docs/guide/index.md similarity index 93% rename from docs/docs/index.md rename to docs/guide/index.md index 31164bd..722b29b 100644 --- a/docs/docs/index.md +++ b/docs/guide/index.md @@ -9,7 +9,7 @@ hide: # Documentation -Zenzic is a **CI-first documentation linter** for MkDocs and Zensical sites. It analyses raw Markdown source files — never the generated HTML — and catches documentation rot before it reaches your users. +Zenzic is a **CI-first documentation linter** for any Markdown-based project. It analyses raw source files — never the generated HTML — and catches documentation rot before it reaches your users. !!! tip "Zero install — run it now" diff --git a/docs/index.md b/docs/index.md index 6f03799..952ecbe 100644 --- a/docs/index.md +++ b/docs/index.md @@ -19,7 +19,7 @@ High-performance documentation linter for any Markdown-based project. Catch broken links, orphan pages, and leaked credentials — before your users do. {: .zz-hero__tagline } -[Get started](docs/index.md){ .md-button .md-button--primary } +[Get started](guide/index.md){ .md-button .md-button--primary } [View on GitHub](https://github.com/PythonWoods/zenzic){ .md-button } {: .zz-hero__actions } diff --git a/docs/it/checks.md b/docs/it/checks.md index 33492f6..7ab712e 100644 --- a/docs/it/checks.md +++ b/docs/it/checks.md @@ -39,13 +39,13 @@ Zenzic esegue sei controlli indipendenti. Ognuno affronta una categoria distinta File media presenti su disco ma mai referenziati. __Supporta l'autofix.__ - [`check assets`](#asset)  •  [`clean assets`](usage/index.md#autofix-cleanup) + [`check assets`](#asset)  •  [`clean assets`](usage/commands.md#autofix-cleanup) - :lucide-shield-check:   __Riferimenti__ Riferimenti pendenti, definizioni morte e credenziali trapelate (exit code 2). - [`zenzic check references`](usage/index.md#integrita-dei-riferimenti-v020) + [`zenzic check references`](usage/advanced.md#integrita-dei-riferimenti-v020) @@ -138,32 +138,47 @@ ORPHANS (2): __CLI:__ `zenzic check snippets` -Gli esempi di codice nella documentazione vengono testati meno rigorosamente del codice in produzione. Un snippet che funzionava quando è stato scritto potrebbe avere un errore di sintassi introdotto da un refactoring. +Gli esempi di codice nella documentazione vengono testati meno rigorosamente del codice in produzione. Un snippet che funzionava quando è stato scritto potrebbe avere un errore di sintassi introdotto da un refactoring, un errore di copia-incolla o una modifica manuale mai revisionata. I lettori che copiano codice rotto perdono tempo a fare debug di errori che non c'entrano nulla con il loro problema reale. -`zenzic check snippets` estrae tutti i blocchi di codice delimitati da `` ```python `` o `` ```py `` e compila ognuno con il `compile()` integrato di Python in modalità `exec`. Viene controllata solo la sintassi — gli errori runtime non vengono rilevati. +`zenzic check snippets` valida la sintassi dei blocchi di codice delimitati usando parser puri in Python — nessun sottoprocesso viene avviato per nessun linguaggio. + +__Linguaggi supportati:__ + +| Tag linguaggio | Parser | Cosa viene controllato | +| :--- | :--- | :--- | +| `` python ``, `` py `` | `compile()` in modalità `exec` | Sintassi Python 3.11+ | +| `` yaml ``, `` yml `` | `yaml.safe_load()` | Struttura YAML 1.1 | +| `` json `` | `json.loads()` | Sintassi JSON | +| `` toml `` | `tomllib.loads()` (stdlib 3.11+) | Sintassi TOML v1.0 | + +I blocchi con qualsiasi altro tag (`` bash ``, `` javascript ``, `` mermaid ``, ecc.) vengono trattati come testo semplice e non vengono controllati sintatticamente. Tuttavia, __ogni blocco delimitato viene comunque scansionato dallo Zenzic Shield__ per i pattern di credenziali — la validazione sintattica e la scansione di sicurezza sono indipendenti. __Comportamento CLI:__ percorre `docs_dir`, legge ogni file `.md` e chiama `check_snippet_content(text, file_path, config)` sul contenuto grezzo. -__Estrazione dei blocchi:__ Zenzic usa una macchina a stati deterministica riga per riga invece di una regex per estrarre i blocchi Python. Questo previene falsi positivi dagli inline code span (es., `` ` ```python ` `` nel testo) ed è robusto rispetto ai documenti `pymdownx.superfences` con fence Mermaid o altri fence personalizzati intercalati. Vedi [Architettura — Parsing a macchina a stati](architecture.md#parsing-a-macchina-a-stati-e-falsi-positivi-da-superfences) per i dettagli. +__Estrazione dei blocchi:__ Zenzic usa una macchina a stati deterministica riga per riga invece di una regex per estrarre i blocchi di codice. Questo previene falsi positivi dagli inline code span (es., `` ` ```python ` `` nel testo) ed è robusto rispetto ai documenti `pymdownx.superfences` con fence Mermaid o altri fence personalizzati intercalati. Vedi [Architettura — Parsing a macchina a stati](architecture.md#parsing-a-macchina-a-stati-e-falsi-positivi-da-superfences) per i dettagli. __Cosa rileva:__ -- `SyntaxError` — due punti mancanti, parentesi non bilanciate, espressioni non valide -- Crash del parser — `MemoryError`, `RecursionError` da input patologici +- Python: `SyntaxError` — due punti mancanti, parentesi non bilanciate, espressioni non valide; crash del parser (`MemoryError`, `RecursionError`) +- YAML: errori strutturali — sequenze non chiuse, mapping non validi, chiavi duplicate +- JSON: `JSONDecodeError` — virgole finali, virgolette mancanti, parentesi non bilanciate +- TOML: `TOMLDecodeError` — virgolette mancanti sui valori, sintassi chiave non valida, mismatch di tipo __Cosa NON rileva:__ - Errori runtime (`NameError`, `TypeError`, `ImportError`, ecc.) — viene controllata solo la sintassi - Snippet intenzionalmente incompleti — frammenti, stub con ellissi, pseudo-codice +- Bash, JavaScript o qualsiasi altro linguaggio senza un parser puro in Python __Tuning:__ usa `snippet_min_lines` in `zenzic.toml` per saltare i blocchi brevi. Il default di `1` controlla tutto inclusi i blocchi su una singola riga. Impostalo a `3` o superiore per ignorare stub di import e one-liner che sono probabilmente illustrativi piuttosto che eseguibili. __Output di esempio:__ ```text -INVALID SNIPPETS (2): - tutorial.md:48 - SyntaxError in Python snippet — invalid syntax - api/reference.md:112 - SyntaxError in Python snippet — expected ':' +INVALID SNIPPETS (3): + tutorial.md:48 - SyntaxError in Python snippet — expected ':' + config/reference.md:22 - SyntaxError in YAML snippet — mapping values are not allowed here + api/reference.md:112 - SyntaxError in JSON snippet — Expecting property name enclosed in double quotes ``` --- @@ -186,7 +201,7 @@ __Comportamento CLI:__ legge ogni file `.md` e chiama `check_placeholder_content __Tuning:__ -```toml +```text # zenzic.toml # Alza la soglia per progetti con pagine dense e concise diff --git a/docs/it/community/contribute/pull-requests.md b/docs/it/community/contribute/pull-requests.md index 4d8ea0c..00e2025 100644 --- a/docs/it/community/contribute/pull-requests.md +++ b/docs/it/community/contribute/pull-requests.md @@ -13,21 +13,31 @@ di lavoro e a garantire l'integrità del codebase. ## Setup per lo sviluppo locale -Clona il repository e installa Zenzic in modalità editabile in modo che le -modifiche locali si riflettano immediatamente senza reinstallare. +Clona il repository e configura l'intero ambiente di sviluppo in un unico passo: + +```bash +git clone https://github.com/PythonWoods/zenzic.git +cd zenzic +nox -s dev +``` + +`nox -s dev` esegue `uv sync --group dev` (installando tutti i gruppi di dipendenze — test, +lint, docs e release tooling) e poi installa gli hook pre-commit. È il comando di setup +canonico one-shot; eseguilo una volta dopo il clone. + +Per un setup più granulare o se non hai ancora `nox` installato, usa direttamente `uv`: === ":simple-astral: uv (raccomandato)" ```bash git clone https://github.com/PythonWoods/zenzic.git cd zenzic - uv venv + uv sync --group dev source .venv/bin/activate # Windows: .venv\Scripts\activate - uv pip install -e . ``` - [`uv`](https://docs.astral.sh/uv/) risolve le dipendenze molto più velocemente di pip e produce un - ambiente riproducibile tramite `uv.lock`. Preferito per tutto il lavoro di + [`uv`](https://docs.astral.sh/uv/) risolve le dipendenze molto più velocemente di pip e + produce un ambiente riproducibile tramite `uv.lock`. Preferito per tutto il lavoro di sviluppo. === ":simple-pypi: pip" @@ -37,15 +47,38 @@ modifiche locali si riflettano immediatamente senza reinstallare. cd zenzic python -m venv .venv source .venv/bin/activate # Windows: .venv\Scripts\activate - pip install -e . + pip install -e ".[docs]" + pip install pytest pytest-cov ruff mypy pre-commit reuse ``` +### Gruppi di dipendenze + +Zenzic usa i [gruppi di dipendenze PEP 735](https://peps.python.org/pep-0735/) per mantenere +la CI veloce installando solo ciò di cui ogni job ha bisogno. I gruppi sono: + +| Gruppo | Contenuto | Quando usarlo | +| :----- | :-------- | :------------ | +| `test` | `pytest`, `pytest-cov` | Esecuzione della suite di test | +| `lint` | `ruff`, `mypy`, `pre-commit`, `reuse` | Linting e type checking | +| `docs` | Stack MkDocs (`mkdocs-material`, ecc.) | Build della documentazione | +| `release` | `nox`, `bump-my-version`, `pip-audit` | Rilasci e audit | +| `dev` | Tutti i precedenti (aggregatore) | Sviluppo locale | + +Installa un singolo gruppo quando hai bisogno solo di un sottoinsieme: + +```bash +uv sync --group test # solo pytest +uv sync --group lint # solo ruff + mypy +uv sync --group docs # solo lo stack MkDocs +uv sync --group dev # tutto (raccomandato per i contributori) +``` + Con un'installazione editabile, il binario `zenzic` nel tuo `PATH` esegue sempre il sorgente su cui stai lavorando. Puoi validare la documentazione del repository in qualsiasi momento: ```bash -zenzic check all # tutti e sei i controlli +zenzic check all # tutti e sette i controlli zenzic check references # include la valutazione delle [[custom_rules]] pytest # suite di test completa ``` diff --git a/docs/it/community/faqs.md b/docs/it/community/faqs.md index 9b9a683..7506642 100644 --- a/docs/it/community/faqs.md +++ b/docs/it/community/faqs.md @@ -19,9 +19,11 @@ qui sotto o usa la barra di ricerca per trovare ciò di cui hai bisogno. **Cos'è Zenzic?** -Zenzic è un linter di documentazione di livello ingegneristico per siti MkDocs e Zensical. Rileva -link interrotti, pagine orfane, stub placeholder, asset mancanti e altro ancora — a livello sorgente, -prima che venga eseguita la build. +Zenzic è un linter di documentazione di livello ingegneristico per qualsiasi progetto basato su +Markdown. Opera sui file sorgente grezzi — mai sull'output generato — quindi funziona con qualsiasi +motore di build: MkDocs, Zensical o qualsiasi altro generatore di siti statici tramite il sistema +di adapter. Rileva link interrotti, pagine orfane, stub placeholder, asset non utilizzati, +credenziali esposte e altro ancora — prima che venga eseguita la build. **Zenzic è gratuito?** @@ -47,13 +49,15 @@ Oppure installalo nel progetto con `uv add --dev zenzic` (raccomandato) o `pip i **Devo creare un file `zenzic.toml`?** No. Zenzic funziona con configurazione zero — i valori predefiniti coprono la maggior parte dei -progetti MkDocs standard. Il file `zenzic.toml` è necessario solo per personalizzare il comportamento, -ad esempio per escludere directory, asset o URL esterni specifici. +progetti standard senza alcuna configurazione aggiuntiva. Il file `zenzic.toml` è necessario solo +per personalizzare il comportamento, ad esempio per escludere directory, asset o URL esterni specifici. **Posso usare Zenzic con un progetto non-MkDocs?** -Attualmente Zenzic supporta MkDocs e Zensical. Il supporto per altri engine è pianificato. -Consulta la guida [Motori](../guides/engines.md) per i dettagli. +Sì. Zenzic funziona su qualsiasi cartella di file Markdown senza richiedere alcun motore di +build (modalità Vanilla). Gli adapter nativi per MkDocs e Zensical aggiungono la consapevolezza +della nav e il supporto i18n. Adapter di terze parti possono estendere questa funzionalità a +qualsiasi altro motore. Consulta la guida [Motori](../guides/engines.md) per i dettagli. ## Checks e risultati @@ -66,13 +70,23 @@ utile per il monitoraggio continuo e i badge. **Cosa significa "orphan page"?** Una pagina orfana è un file Markdown presente nella directory `docs/` ma assente dalla -navigazione (`nav:`) in `mkdocs.yml`. Le pagine orfane non sono raggiungibili dagli utenti -ma occupano spazio e creano confusione. Zenzic le segnala per tenerti in controllo. +navigazione del sito dichiarata nel file di configurazione del motore di build. Le pagine +orfane non sono raggiungibili dagli utenti che navigano la struttura del sito. Zenzic le +segnala per tenerti in controllo. **Il check dei link esterni è lento. Posso disabilitarlo?** -Puoi escludere URL specifici con `excluded_external_urls` in `zenzic.toml`. Per saltare -completamente il controllo dei link esterni usa `zenzic check links --no-external`. +La verifica dei link esterni viene eseguita solo quando viene passato il flag `--strict` — +omettere il flag disabilita completamente tutte le richieste di rete. Per escludere +permanentemente URL specifici senza rinunciare alla modalità strict, aggiungi i loro prefissi +a `excluded_external_urls` in `zenzic.toml`: + +```toml +excluded_external_urls = [ + "https://internal.company.com", + "https://github.com/MyOrg/private-repo", +] +``` **Zenzic controlla anche i link nelle immagini?** @@ -93,8 +107,14 @@ guida [CI/CD](../ci-cd.md). **Cosa fa il flag `--strict`?** -In modalità strict, qualsiasi warning diventa un errore. È consigliato nelle pipeline CI -per garantire che nessuna issue passi inosservata. +Il flag `--strict` ha due effetti a seconda del comando: + +- `zenzic check links --strict` / `zenzic check all --strict`: abilita anche la verifica + dei link HTTP/HTTPS esterni tramite richieste di rete (disabilitata per default per velocità). +- `zenzic check references --strict`: tratta le Dead Definitions (link di riferimento definiti + ma mai usati) come errori bloccanti anziché warning. + +Consigliato nelle pipeline CI per intercettare tutte le categorie di problemi. **Cos'è il Zenzic Shield (exit code 2)?** diff --git a/docs/it/community/index.md b/docs/it/community/index.md index a562fda..612ee00 100644 --- a/docs/it/community/index.md +++ b/docs/it/community/index.md @@ -55,4 +55,31 @@ In quanto software Open Source, Zenzic beneficia di diversi tipi di contributi d [Come contribuire](contribute/index.md){ .md-button .md-button--primary } +--- + +## Citare Zenzic + +Se utilizzi Zenzic nella tua ricerca, nelle pipeline di documentazione o in report tecnici e +desideri citarlo formalmente, il file [`CITATION.cff`][citation-cff] è presente nella root del +repository. La maggior parte degli strumenti di ricerca (GitHub, Zenodo, Zotero) può leggere +questo formato direttamente. + +__GitHub__: clicca il pulsante __"Cite this repository"__ sulla [pagina del repository][repo] — +GitHub trasforma automaticamente il `CITATION.cff` in formato APA e BibTeX. + +__BibTeX manuale:__ + +```text +@software{zenzic, + author = {PythonWoods}, + title = {{Zenzic: The Agnostic Documentation Integrity Framework}}, + version = {0.4.0rc3}, + date = {2026-03-29}, + url = {https://zenzic.pythonwoods.dev/}, + license = {Apache-2.0}, +} +``` + + [citation-cff]: https://github.com/PythonWoods/zenzic/blob/main/CITATION.cff [issue tracker]: https://github.com/PythonWoods/zenzic/issues + [repo]: https://github.com/PythonWoods/zenzic diff --git a/docs/it/configuration/core-settings.md b/docs/it/configuration/core-settings.md index 2ae2081..80c23d6 100644 --- a/docs/it/configuration/core-settings.md +++ b/docs/it/configuration/core-settings.md @@ -166,8 +166,52 @@ validate_same_page_anchors = true **Tipo:** intero — **Default:** `0` Punteggio di qualità minimo (0–100). Se `zenzic score` produce un punteggio inferiore a questo -valore, il comando termina con codice di uscita 1. +valore, il comando termina con codice di uscita 1. Il flag CLI `--fail-under` sovrascrive questo +valore per una singola esecuzione. ```toml fail_under = 80 ``` + +--- + +## `strict` + +**Tipo:** booleano — **Default:** `false` + +Quando `true`, ogni invocazione di `zenzic check all`, `zenzic score` e `zenzic diff` si comporta +come se fosse passato `--strict`: gli URL esterni vengono validati via rete e i warning vengono +trattati come errori. + +Usa questo campo per rendere la modalità strict il default permanente per un progetto, senza +dover aggiungere `--strict` a ogni comando CI: + +```toml +strict = true +``` + +Il flag CLI `--strict` sovrascrive questo valore per una singola esecuzione. + +--- + +## `exit_zero` + +**Tipo:** booleano — **Default:** `false` + +Quando `true`, `zenzic check all` termina sempre con codice `0` anche quando vengono trovati +problemi. Tutti i risultati vengono comunque stampati e inclusi nel punteggio qualità — viene +soppressa solo l'uscita non-zero. + +Usa questo campo durante uno sprint attivo di miglioramento della documentazione per ottenere +visibilità completa senza bloccare la pipeline: + +```toml +exit_zero = true +``` + +Il flag CLI `--exit-zero` sovrascrive questo valore per una singola esecuzione. + +!!! warning "Usa con cautela" + Impostare `exit_zero = true` in `zenzic.toml` disabilita il quality gate globalmente. + Preferisci usare `--exit-zero` come flag CLI temporaneo durante gli sprint di cleanup, + rimuovendolo una volta che il baseline è pulito. diff --git a/docs/it/configuration/custom-rules-dsl.md b/docs/it/configuration/custom-rules-dsl.md index 66aec16..56615e2 100644 --- a/docs/it/configuration/custom-rules-dsl.md +++ b/docs/it/configuration/custom-rules-dsl.md @@ -54,6 +54,23 @@ Ogni header `[[custom_rules]]` aggiunge una regola alla lista (sintassi TOML arr --- +## Formato dell'output + +Quando una regola custom scatta, Zenzic mostra il problema con uno snippet visivo della riga +incriminata: + +```text +[ZZ-NODRAFT] docs/guide/install.md:14 — Rimuovere il marker DRAFT prima della pubblicazione. + │ > DRAFT: sezione in costruzione +``` + +Per la severità `"error"` l'ID della regola viene mostrato in rosso; per `"warning"` in giallo. +La riga `│` mostra la riga sorgente esattamente come appare nel file. + +Le regole con `severity = "info"` vengono stampate senza lo snippet `│`. + +--- + ## Indipendenza dall'adapter **Le regole custom sono indipendenti dall'adapter.** Una regola che cerca `DRAFT` si attiva diff --git a/docs/it/developers/examples.md b/docs/it/developers/examples.md new file mode 100644 index 0000000..406749f --- /dev/null +++ b/docs/it/developers/examples.md @@ -0,0 +1,143 @@ +--- +icon: lucide/folder-open +--- + + + + +# Progetti di Esempio + +La directory `examples/` alla root del repository contiene quattro progetti +auto-contenuti. Ognuno è un fixture eseguibile: naviga nella directory e lancia +`zenzic check all` per vederne l'output. + +```bash +git clone https://github.com/PythonWoods/zenzic +cd zenzic/examples/ +zenzic check all +``` + +--- + +## broken-docs — Fixture di Fallimenti Intenzionali + +**Scopo:** Attivare ogni controllo Zenzic almeno una volta. Utile per testare un +nuovo controllo o verificare che un messaggio di errore sia formattato correttamente. + +**Risultato atteso:** `FAILED` — molteplici fallimenti, codice di uscita 1. + +| Controllo | Cosa lo attiva | +| --- | --- | +| Link | File mancante, ancora morta, path traversal, percorso assoluto, i18n rotto | +| Orfani | `api.md` esiste su disco ma è assente dalla `nav` | +| Snippet | Blocco Python con `SyntaxError` (due punti mancanti) | +| Placeholder | `api.md` ha solo 18 parole e un marcatore di task | +| Asset | `assets/unused.png` è su disco ma mai referenziato | +| Regole custom | Pattern `ZZ-NOFIXME` in `zenzic.toml` | + +```bash +cd examples/broken-docs +zenzic check all # uscita 1 +zenzic check all --exit-zero # uscita 0 (modalità soft-gate) +``` + +Motore: `mkdocs`. Contiene anche un `zensical.toml` per dimostrare lo stesso +fixture con il motore Zensical. + +--- + +## i18n-standard — Progetto Bilingue Gold Standard + +**Scopo:** Dimostrare un progetto bilingue perfettamente pulito con punteggio 100/100. +Usa questo come template di riferimento per un nuovo progetto multilingua. + +**Risultato atteso:** `SUCCESS` — tutti i controlli passano, punteggio 100/100. + +Pattern chiave dimostrati: + +- **Suffix-mode i18n** — le traduzioni vivono come file `pagina.it.md` nella stessa + cartella, mai in un sottoalbero `docs/it/` +- **Simmetria dei percorsi** — `../../assets/brand-kit.zip` si risolve identicamente + da `pagina.md` e `pagina.it.md` +- **Esclusione build artifact** — `excluded_build_artifacts` permette a Zenzic di + validare i link a file generati senza che siano presenti su disco +- **`fail_under = 100`** — qualsiasi regressione rompe il gate + +```bash +cd examples/i18n-standard +zenzic check all --strict # uscita 0, punteggio 100/100 +``` + +Motore: `mkdocs` con plugin `i18n` in modalità `docs_structure: suffix`. + +--- + +## security_lab — Fixture di Test per lo Shield + +**Scopo:** Verificare il sottosistema Shield — rilevamento credenziali e +classificazione del path traversal — prima dei rilasci. + +**Risultato atteso:** `FAILED` — codice di uscita 2 (evento Shield; non sopprimibile). + +| File | Cosa attiva | +| --- | --- | +| `traversal.md` | `PathTraversal`: `../../etc/passwd` sfugge a `docs/` | +| `attack.md` | `PathTraversal` + sette pattern fake di credenziali (tutte le famiglie Shield) | +| `absolute.md` | Percorsi assoluti (`/assets/logo.png`, `/etc/passwd`) | +| `fenced.md` | Credenziali fake dentro blocchi delimitati senza etichetta e con etichetta `bash` | + +```bash +cd examples/security_lab +zenzic check links --strict # uscita 1 (path traversal) +zenzic check references # uscita 2 (Shield: credenziali fake) +zenzic check all # uscita 2 (Shield ha la priorità) +``` + +> Le credenziali in `attack.md` e `fenced.md` sono completamente sintetiche — +> corrispondono alla forma regex ma non sono token validi per nessun servizio. + +Motore: `mkdocs`. + +--- + +## vanilla — Gate di Qualità Agnostico rispetto all'Engine + +**Scopo:** Mostrare Zenzic in esecuzione senza nessun motore di build. Nessun +`mkdocs.yml`, nessun `zensical.toml`, nessuna config Hugo. Solo +`engine = "vanilla"` in `zenzic.toml`. + +**Risultato atteso:** `SUCCESS` — tutti i controlli applicabili passano. + +Cosa funziona in modalità Vanilla: + +- Link, snippet, placeholder e asset sono verificati completamente +- Le `[[custom_rules]]` si attivano identicamente a qualsiasi altra modalità +- `fail_under` impone un punteggio di qualità minimo +- Il **controllo orfani è saltato** — senza nav dichiarata non esiste un insieme di riferimento + +```bash +cd examples/vanilla +zenzic check all # uscita 0 +``` + +Usa la modalità Vanilla per Hugo, Docusaurus, Sphinx, Astro, Jekyll, wiki GitHub +o qualsiasi progetto che non usa MkDocs o Zensical. + +--- + +## Eseguire la suite completa degli esempi + +Dalla root del repository, verifica che tutti gli esempi producano i codici di +uscita attesi: + +```bash +# Gold standard e vanilla: devono essere puliti +(cd examples/i18n-standard && zenzic check all --strict) +(cd examples/vanilla && zenzic check all) + +# Broken: deve fallire con uscita 1 +(cd examples/broken-docs && zenzic check all); [ $? -eq 1 ] + +# Security lab: deve uscire con codice 2 (Shield) +(cd examples/security_lab && zenzic check all); [ $? -eq 2 ] +``` diff --git a/docs/it/developers/writing-an-adapter.md b/docs/it/developers/writing-an-adapter.md index 775ab43..85b5626 100644 --- a/docs/it/developers/writing-an-adapter.md +++ b/docs/it/developers/writing-an-adapter.md @@ -16,30 +16,66 @@ terze parti. Per la guida completa in inglese, consulta ## Il protocollo `BaseAdapter` Ogni adapter deve implementare il protocollo `BaseAdapter` -(`src/zenzic/core/adapters/_base.py`). Le cinque funzioni richieste: +(`src/zenzic/core/adapters/_base.py`). I sette metodi richiesti: + +| Metodo | Domanda | +|---|---| +| `is_locale_dir(part)` | Questa directory è una locale non-default? | +| `resolve_asset(missing_abs, docs_root)` | Esiste un fallback default-locale per questo asset mancante? | +| `resolve_anchor(resolved_file, anchor, anchors_cache, docs_root)` | Questo anchor miss deve essere soppresso perché l'ancora esiste nel file default-locale equivalente? | +| `is_shadow_of_nav_page(rel, nav_paths)` | Questo file è il mirror locale di una pagina nella nav? | +| `get_ignored_patterns()` | Quali glob di filename deve saltare il controllo orfani? | +| `get_nav_paths()` | Quali percorsi `.md` sono dichiarati nella nav di questo motore? | +| `has_engine_config()` | È stato trovato un file di config del motore? (Controlla l'attivazione del controllo orfani.) | ```python +from pathlib import Path +from typing import Any + + class MyEngineAdapter: - def nav_paths(self) -> list[str]: - """Restituisce tutti i percorsi file dichiarati nella nav, relativi a docs_dir.""" - ... + def __init__(self, config: dict[str, Any], docs_root: Path) -> None: + self._config = config + self._docs_root = docs_root + + def is_locale_dir(self, part: str) -> bool: + return part in self._config.get("locales", []) + + def resolve_asset(self, missing_abs: Path, docs_root: Path) -> Path | None: + return None # nessun fallback i18n + + def resolve_anchor( + self, resolved_file: Path, anchor: str, + anchors_cache: dict[Path, set[str]], docs_root: Path, + ) -> bool: + return False # nessun fallback i18n per le ancore + + def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: + if not rel.parts or not self.is_locale_dir(rel.parts[0]): + return False + return Path(*rel.parts[1:]).as_posix() in nav_paths - def locale_dirs(self) -> list[str]: - """Restituisce i nomi delle directory locale non-default (es. ['it', 'fr']).""" - ... + def get_ignored_patterns(self) -> set[str]: + return set() - def asset_fallback(self, path: str, locale: str) -> str: - """Risolve un path asset relativo a una pagina locale nel path canonico.""" - ... + def get_nav_paths(self) -> frozenset[str]: + paths = {e for e in self._config.get("nav", []) + if isinstance(e, str) and e.endswith(".md")} + return frozenset(p.lstrip("/") for p in paths) def has_engine_config(self) -> bool: - """Restituisce True quando è stato trovato e caricato un file di config del motore.""" - ... + return bool(self._config) @classmethod - def from_repo(cls, context, docs_root, repo_root) -> "MyEngineAdapter": - """Fabbrica che carica la configurazione dalla root del repository.""" - ... + def from_repo(cls, context: Any, docs_root: Path, repo_root: Path) -> "MyEngineAdapter": + """Carica la configurazione dalla root del repository.""" + import tomllib + config_path = repo_root / "myengine.toml" + config: dict[str, Any] = {} + if config_path.exists(): + with config_path.open("rb") as f: + config = tomllib.load(f) + return cls(config, docs_root) ``` --- diff --git a/docs/it/docs/index.md b/docs/it/guide/index.md similarity index 92% rename from docs/it/docs/index.md rename to docs/it/guide/index.md index 2c7bd06..50cdb95 100644 --- a/docs/it/docs/index.md +++ b/docs/it/guide/index.md @@ -9,7 +9,7 @@ hide: # Documentazione -Zenzic è un **linter di documentazione CI-first** per siti MkDocs e Zensical. Analizza file Markdown sorgente grezzi — mai l'HTML generato — e intercetta il degrado della documentazione prima che raggiunga i tuoi utenti. +Zenzic è un **linter di documentazione CI-first** per qualsiasi progetto basato su Markdown. Analizza i file sorgente grezzi — mai l'HTML generato — e intercetta il degrado della documentazione prima che raggiunga i tuoi utenti. !!! tip "Zero installazione — eseguilo ora" diff --git a/docs/it/index.md b/docs/it/index.md index 347f277..8906403 100644 --- a/docs/it/index.md +++ b/docs/it/index.md @@ -19,7 +19,7 @@ Linter di documentazione ad alte prestazioni per qualsiasi progetto Markdown. Intercetta link non validi, pagine orfane e credenziali esposte — prima dei tuoi utenti. {: .zz-hero__tagline } -[Inizia](docs/index.md){ .md-button .md-button--primary } +[Inizia](guide/index.md){ .md-button .md-button--primary } [Vedi su GitHub](https://github.com/PythonWoods/zenzic){ .md-button } {: .zz-hero__actions } diff --git a/docs/it/usage/advanced.md b/docs/it/usage/advanced.md new file mode 100644 index 0000000..ab8525e --- /dev/null +++ b/docs/it/usage/advanced.md @@ -0,0 +1,263 @@ +--- +icon: lucide/shield-check +--- + + + + +# Funzionalità avanzate + +Riferimento approfondito sulla Three-Pass Pipeline, Zenzic Shield, controlli di accessibilità +e utilizzo programmatico da Python. + +--- + +## Integrità dei riferimenti (v0.2.0) + +`zenzic check references` esegue la **Three-Pass Reference Pipeline** — il motore alla base di +ogni controllo di qualità e sicurezza sui riferimenti. + +### Perché tre pass? + +I [reference-style link][syntax] Markdown separano *dove punta un link* (la definizione) da +*dove appare* (l'utilizzo). Uno scanner single-pass non può risolvere un riferimento che +appare prima della sua definizione. Zenzic risolve questo con una struttura deliberata a tre pass: + +| Pass | Nome | Cosa succede | +| :---: | :--- | :--- | +| 1 | **Harvest** | Legge il file riga per riga; registra tutte le definizioni `[id]: url` in una `ReferenceMap`; esegue lo Shield su ogni URL e riga | +| 2 | **Cross-Check** | Riscorre il file; per ogni utilizzo `[testo][id]`, cerca `id` nella `ReferenceMap` ora completa; segnala gli ID mancanti come **Dangling Reference** | +| 3 | **Integrity Report** | Calcola il punteggio di integrità; aggiunge le **Dead Definition**, i warning per ID duplicati e per alt-text mancanti | + +Il Pass 2 inizia solo quando il Pass 1 termina senza security finding. Se lo Shield scatta +durante l'harvesting, Zenzic esce immediatamente con codice 2 — nessuna risoluzione di +riferimenti avviene su file che contengono credenziali esposte. + +### Cosa intercetta la pipeline + +| Problema | Tipo | Blocca l'uscita? | +| :--- | :---: | :---: | +| **Dangling Reference** — `[testo][id]` dove `id` non ha definizione | errore | Sì | +| **Dead Definition** — `[id]: url` definito ma mai usato da nessun link | warning | No (sì con `--strict`) | +| **Duplicate Definition** — stesso `id` definito due volte; vince il primo (CommonMark §4.7) | warning | No | +| **Alt-text mancante** — `![](url)` o `` con alt vuoto/assente | warning | No | +| **Segreto rilevato** — pattern di credenziale trovato in un URL di riferimento o riga | sicurezza | **Exit 2** | + +### Reference Integrity Score + +Ogni file riceve un punteggio per-file: + +```text +Reference Integrity = (definizioni risolte / definizioni totali) × 100 +``` + +Un file dove ogni definizione è usata almeno una volta ottiene 100. Le definizioni non usate +(dead) abbassano il punteggio. Quando un file non ha definizioni, il punteggio è 100 per +convenzione. + +Il punteggio di integrità è un **diagnostico per-file** — non confluisce nel punteggio di +qualità complessivo di `zenzic score`. Usalo per identificare file che accumulano reference +link boilerplate non usati. + +--- + +## Zenzic Shield + +Lo Shield gira **dentro il Pass 1** — ogni URL estratto da una reference definition viene +scansionato nel momento in cui l'harvester lo incontra, prima che qualsiasi altra elaborazione +continui. Lo Shield applica anche un pass di difesa in profondità alle righe non-definizione +per intercettare segreti nella prosa normale. + +### Pattern di credenziali rilevati + +| Nome pattern | Regex | Cosa intercetta | +| :--- | :--- | :--- | +| `openai-api-key` | `sk-[a-zA-Z0-9]{48}` | Chiavi API OpenAI | +| `github-token` | `gh[pousr]_[a-zA-Z0-9]{36}` | Token personal/OAuth GitHub | +| `aws-access-key` | `AKIA[0-9A-Z]{16}` | Access key ID AWS IAM | +| `stripe-live-key` | `sk_live_[0-9a-zA-Z]{24}` | Chiavi segrete live Stripe | +| `slack-token` | `xox[baprs]-[0-9a-zA-Z]{10,48}` | Token bot/utente/app Slack | +| `google-api-key` | `AIza[0-9A-Za-z\-_]{35}` | Chiavi API Google Cloud / Maps | +| `private-key` | `-----BEGIN [A-Z ]+ PRIVATE KEY-----` | Chiavi private PEM (RSA, EC, ecc.) | + +### Comportamento dello Shield + +- **Ogni riga viene scansionata** — incluse le righe dentro i blocchi di codice delimitati (con o + senza etichetta). Una credenziale committata in un esempio `bash` è comunque una credenziale + committata. +- Il rilevamento è **non sopprimibile** — `--exit-zero`, `exit_zero = true` in `zenzic.toml` e + `--strict` non hanno effetto sui security finding dello Shield. +- Il codice di uscita 2 è riservato **esclusivamente** agli eventi Shield. Non viene mai usato + per i fallimenti ordinari dei controlli. +- I file con security finding sono **esclusi dalla validazione dei link** — Zenzic non fa ping + a URL che potrebbero contenere credenziali esposte. +- **Isolamento dei link nei blocchi di codice** — lo Shield scansiona l'interno dei blocchi + delimitati, ma il validatore di link e riferimenti no. Gli URL di esempio nei blocchi di codice + (es. `https://api.example.com`) non producono mai falsi positivi nei link. + +!!! danger "Se ricevi il codice di uscita 2" + Trattalo come un incidente di sicurezza bloccante. Ruota immediatamente la credenziale + esposta, poi rimuovi o sostituisci l'URL di riferimento incriminato. Non committare il + segreto nella history. + +--- + +## Logica di scansione ibrida + +Zenzic applica regole di scansione diverse alla prosa e ai blocchi di codice perché i due +contesti hanno profili di rischio diversi: + +| Posizione del contenuto | Shield (segreti) | Sintassi snippet | Validazione link/ref | +| :--- | :---: | :---: | :---: | +| Prosa e definizioni di riferimento | ✓ | — | ✓ | +| Blocco delimitato — linguaggio supportato (`python`, `yaml`, `json`, `toml`) | ✓ | ✓ | — | +| Blocco delimitato — linguaggio non supportato (`bash`, `javascript`, …) | ✓ | — | — | +| Blocco delimitato — senza etichetta (` ``` `) | ✓ | — | — | + +**Perché i link sono esclusi dai blocchi delimitati:** gli esempi di documentazione contengono +spesso URL illustrativi (`https://api.example.com/v1/users`) che non esistono come endpoint +reali. Controllarli produrrebbe centinaia di falsi positivi senza alcun valore di sicurezza. + +**Perché i segreti sono inclusi ovunque:** una credenziale incorporata in un esempio `bash` è +comunque un segreto committato. Vive nella git history, viene indicizzato dagli strumenti di +ricerca nel codice e può essere estratto da scanner automatici che non rispettano la formattazione +Markdown. + +**Perché la verifica sintattica è limitata ai parser noti:** validare Bash o JavaScript +richiederebbe parser di terze parti o sottoprocessi, violando il Pilastro No-Subprocess. Zenzic +valida solo ciò che può validare in puro Python. + +--- + +## Accessibilità alt-text + +`zenzic check references` segnala anche le immagini prive di alt text significativo: + +- **Immagini Markdown inline** — `![](url)` o `![ ](url)` (alt string vuota) +- **Tag HTML ``** — `` senza attributo `alt`, o `alt=""` senza contenuto + +Un `alt=""` esplicitamente vuoto viene trattato come decorativo intenzionale e **non** viene +segnalato. Un attributo `alt` completamente assente, o alt text con solo spazi, viene segnalato +come warning. + +I finding di alt-text sono warning — appaiono nel report ma non influenzano il codice di +uscita a meno che `--strict` non sia attivo. + +--- + +## Utilizzo programmatico + +Importa le funzioni scanner di Zenzic direttamente nei tuoi tool Python. + +### Scansione di un singolo file + +Usa `ReferenceScanner` per eseguire la pipeline a tre pass su un singolo file: + +```python +from pathlib import Path +from zenzic.core.scanner import ReferenceScanner + +scanner = ReferenceScanner(Path("docs/guide.md")) + +# Pass 1 — harvest definizioni; raccoglie i security finding +security_findings = [] +for lineno, event_type, data in scanner.harvest(): + if event_type == "SECRET": + security_findings.append(data) + # In produzione: raise SystemExit(2) o typer.Exit(2) qui + +# Pass 2 — risolve i reference link (deve essere dopo harvest) +cross_check_findings = scanner.cross_check() + +# Pass 3 — calcola il punteggio di integrità e consolida tutti i finding +report = scanner.get_integrity_report(cross_check_findings, security_findings) + +print(f"Punteggio integrità: {report.score:.1f}") +for f in report.findings: + level = "WARN" if f.is_warning else "ERROR" + print(f" [{level}] {f.file_path}:{f.line_no} — {f.detail}") +``` + +### Scansione multi-file + +Usa `scan_docs_references_with_links` per scansionare ogni file `.md` in un repository e +facoltativamente validare gli URL esterni: + +```python +from pathlib import Path +from zenzic.core.scanner import scan_docs_references_with_links +from zenzic.models.config import ZenzicConfig + +config, _ = ZenzicConfig.load(Path(".")) + +reports, link_errors = scan_docs_references_with_links( + Path("."), + validate_links=True, # imposta False per saltare la validazione HTTP + config=config, +) + +for report in reports: + if report.security_findings: + raise SystemExit(2) # il tuo codice è responsabile dell'applicazione del codice di uscita + for finding in report.findings: + print(finding) + +for error in link_errors: + print(f"[LINK] {error}") +``` + +`scan_docs_references_with_links` deduplica gli URL esterni sull'intero albero della +documentazione prima di inviare richieste HTTP — 50 file che linkano allo stesso URL producono +esattamente una richiesta HEAD. + +### Scansione parallela (repository grandi) + +Per repository con più di ~200 file Markdown, usa `scan_docs_references_parallel`: + +```python +from pathlib import Path +from zenzic.core.scanner import scan_docs_references_parallel + +reports = scan_docs_references_parallel(Path("."), workers=4) +``` + +La modalità parallela usa `ProcessPoolExecutor`. La validazione degli URL esterni non è +disponibile in modalità parallela — usa `scan_docs_references_with_links` per la scansione +sequenziale con validazione dei link. + +--- + +## Esclusione di code block e frontmatter + +L'harvester e il cross-checker saltano entrambi il contenuto che non dovrebbe mai produrre +finding: + +- **YAML frontmatter** — il blocco `---` iniziale (solo prima riga) viene saltato per intero, + inclusa qualsiasi sintassi simile a reference che potrebbe contenere. +- **Fenced code block** — le righe dentro i fence ` ``` ` o `~~~` vengono ignorate. Gli URL + negli esempi di codice non producono mai falsi positivi. + +Questa esclusione viene applicata in modo coerente sia nel Pass 1 che nel Pass 2. + +--- + +## Documentazione multilingue + +Quando il tuo progetto usa i18n MkDocs o il sistema di locale di Zensical, Zenzic si adatta +automaticamente: + +- **Directory locale soppresse dal rilevamento orfani** — i file sotto `docs/it/`, `docs/fr/`, + ecc. non vengono segnalati come orfani. L'adapter rileva le directory locale dalla + configurazione i18n dell'engine. +- **Risoluzione dei link cross-locale** — gli adapter MkDocs e Zensical risolvono i link che + attraversano i confini di locale senza falsi positivi. +- **La modalità Vanilla salta completamente il controllo orfani** — quando non è presente alcuna + config del motore di build, ogni file sembrerebbe un orfano. Zenzic salta il controllo + piuttosto che segnalare rumore. + +!!! tip "Forza la modalità Vanilla per sopprimere il controllo orfani" + ```bash + zenzic check all --engine vanilla + ``` + +[syntax]: https://spec.commonmark.org/0.31.2/#link-reference-definitions diff --git a/docs/it/usage/badges.md b/docs/it/usage/badges.md index 6fa1c1a..b964b1b 100644 --- a/docs/it/usage/badges.md +++ b/docs/it/usage/badges.md @@ -69,7 +69,7 @@ Il flag `--fail-under` disaccoppia l'exit code dal valore del punteggio. I team `zenzic score --save` scrive `.zenzic-score.json` alla root del repository: -```json +```text {"project": "zenzic", "score": 100, "threshold": 0, "status": "success", "timestamp": "...", "categories": [...]} ``` diff --git a/docs/it/usage/commands.md b/docs/it/usage/commands.md new file mode 100644 index 0000000..da85d51 --- /dev/null +++ b/docs/it/usage/commands.md @@ -0,0 +1,249 @@ +--- +icon: lucide/terminal +--- + + + + +# Comandi CLI + +Riferimento completo per ogni comando, flag e codice di uscita Zenzic. + +--- + +## Controlli + +```bash +# Controlli individuali +zenzic check links # Link interni; aggiungi --strict per la validazione HTTP esterna +zenzic check orphans # Pagine su disco mancanti dalla nav +zenzic check snippets # Blocchi Python che non compilano +zenzic check placeholders # Pagine stub: basso conteggio parole o pattern vietati +zenzic check assets # File media non referenziati da nessuna pagina +zenzic check references # Reference-style link + Zenzic Shield (rilevamento credenziali) + +# Tutti i controlli in sequenza +zenzic check all # Esegue tutti i controlli +zenzic check all --strict # Valida anche gli URL esterni; tratta i warning come errori +zenzic check all --format json # Output machine-readable +zenzic check all --exit-zero # Segnala problemi ma esce sempre con codice 0 +zenzic check all --engine mkdocs # Sovrascrive il motore rilevato +``` + +### Flag `--strict` + +| Comando | Effetto | +| :--- | :--- | +| `check links --strict` | Valida gli URL HTTP/HTTPS esterni via richieste di rete concorrenti | +| `check all --strict` | Valida gli URL esterni + tratta i warning come errori | +| `check references --strict` | Tratta le Dead Definitions (reference link non usati) come errori bloccanti | +| `score --strict` / `diff --strict` | Esegue il link check in modalità strict | + +Puoi anche impostare `strict = true` in `zenzic.toml` per renderlo il default permanente. + +### Flag `--exit-zero` + +Esce sempre con codice `0` anche quando vengono trovati problemi. Tutti i problemi vengono +comunque stampati e inclusi nel punteggio — solo il codice di uscita viene soppresso. Utile per +pipeline di sola osservazione. + +Puoi anche impostare `exit_zero = true` in `zenzic.toml` per renderlo il default permanente. + +--- + +## Autofix & Cleanup + +```bash +zenzic clean assets # Elimina gli asset non utilizzati interattivamente (prompt per ognuno) +zenzic clean assets -y # Elimina gli asset non utilizzati immediatamente (senza prompt) +zenzic clean assets --dry-run # Mostra cosa verrebbe eliminato senza farlo +``` + +`zenzic clean assets` rispetta `excluded_assets`, `excluded_dirs` e +`excluded_build_artifacts` da `zenzic.toml` — non eliminerà mai i file che corrispondono a +questi pattern. + +--- + +## Server di sviluppo + +```bash +zenzic serve # Avvia il server con pre-flight di qualità +zenzic serve --engine mkdocs # Forza un motore specifico +zenzic serve --engine zensical +zenzic serve --port 9000 # Porta di partenza personalizzata (prova fino a 10 porte consecutive) +zenzic serve -p 9000 +zenzic serve --no-preflight # Salta il pre-flight e avvia il server immediatamente +``` + +`zenzic serve` rileva automaticamente il motore di documentazione dalla root del repository: + +| File di config presente | Binario disponibile | Risultato | +| :--- | :--- | :--- | +| `zensical.toml` | `zensical` o `mkdocs` | Avvia il motore disponibile | +| `zensical.toml` | nessuno | Errore — installa un motore | +| solo `mkdocs.yml` | `mkdocs` o `zensical` | Avvia il motore disponibile | +| solo `mkdocs.yml` | nessuno | Errore — installa un motore | +| nessuno | qualsiasi | Server statico su `site/` (senza hot-reload) | + +Prima di avviare il server, Zenzic esegue un pre-flight silenzioso (orfani, snippet, +placeholder, asset non usati). I problemi vengono stampati come warning ma non bloccano mai +l'avvio. La validazione dei link esterni è esclusa intenzionalmente dal pre-flight. Usa +`--no-preflight` per saltare il controllo qualità quando sei nel mezzo di una fix. + +**Gestione della porta.** Zenzic individua una porta libera tramite socket probe prima di +avviare il subprocess dell'engine, poi passa `--dev-addr 127.0.0.1:{porta}` a mkdocs o +zensical. L'errore `Address already in use` non può mai provenire dall'engine. + +--- + +## Codici di uscita + +| Codice | Significato | +| :---: | :--- | +| `0` | Tutti i controlli selezionati sono passati (o `--exit-zero` era impostato) | +| `1` | Uno o più controlli hanno segnalato problemi | +| **`2`** | **SECURITY CRITICAL — Zenzic Shield ha rilevato una credenziale esposta** | + +!!! danger "Il codice di uscita 2 è riservato agli eventi di sicurezza" + Il codice 2 viene emesso esclusivamente da `zenzic check references` quando lo Shield + rileva un pattern di credenziale noto incorporato in un URL di riferimento. Non viene mai + usato per i fallimenti ordinari dei controlli. Se ricevi il codice di uscita 2, trattalo + come un incidente di sicurezza bloccante e **ruota immediatamente la credenziale esposta**. + +--- + +## Output JSON + +Passa `--format json` a `check all` per output strutturato: + +```bash +zenzic check all --format json | jq '.orphans' +zenzic check all --format json > report.json +``` + +Il report JSON contiene sette chiavi: + +```json +{ + "links": [], + "orphans": [], + "snippets": [], + "placeholders": [], + "unused_assets": [], + "references": [], + "nav_contract": [] +} +``` + +Ogni chiave contiene una lista di stringhe o oggetti con i problemi. Una lista vuota significa +che il controllo è passato. `nav_contract` valida i link `extra.alternate` in `mkdocs.yml` +rispetto alla Virtual Site Map — sempre vuoto per i progetti non MkDocs. + +--- + +## Override dell'engine + +Il flag `--engine` sovrascrive l'adapter del motore di build per una singola esecuzione senza +modificare `zenzic.toml`. Accettato da `check orphans` e `check all`: + +```bash +zenzic check orphans --engine mkdocs +zenzic check all --engine zensical +zenzic check all --engine vanilla # disabilita il controllo orfani indipendentemente dalla config +``` + +Se passi un nome di engine per cui non esiste un adapter registrato, Zenzic elenca gli adapter +disponibili ed esce con codice 1: + +```text +ERROR: Unknown engine adapter 'hugo'. +Installed adapters: mkdocs, vanilla, zensical +Install a third-party adapter or choose from the list above. +``` + +Gli adapter di terze parti vengono scoperti automaticamente una volta installati — nessun +aggiornamento Zenzic richiesto. Vedi [Scrivere un Adapter](../developers/writing-an-adapter.md). + +--- + +## Punteggio qualità + +I controlli individuali rispondono a una domanda binaria: passa o fallisce. `zenzic score` +risponde a una diversa: *quanto è sana questa documentazione, e sta migliorando o peggiorando +nel tempo?* + +```bash +zenzic score # Calcola punteggio 0–100 +zenzic score --save # Calcola e persiste snapshot in .zenzic-score.json +zenzic score --fail-under 80 # Esce con 1 se il punteggio è sotto la soglia +zenzic score --format json # Report punteggio machine-readable + +zenzic diff # Confronta punteggio attuale con snapshot salvato +zenzic diff --threshold 5 # Esce con 1 solo se il calo è superiore a 5 punti +zenzic diff --format json # Report diff machine-readable +``` + +### Come è calcolato il punteggio + +Ogni categoria di controllo porta un peso fisso che riflette il suo impatto sull'esperienza del +lettore: + +| Categoria | Peso | Rationale | +| :--- | ---: | :--- | +| links | 35 % | Un link non valido è un vicolo cieco immediato per il lettore | +| orphans | 20 % | Le pagine irraggiungibili sono invisibili — potrebbero non esistere | +| snippets | 20 % | Esempi di codice non validi fuorviano attivamente gli sviluppatori | +| placeholders | 15 % | Il contenuto stub segnala una pagina incompiuta o abbandonata | +| assets | 10 % | Gli asset non usati sono spreco, ma non bloccano il lettore | + +All'interno di ogni categoria, il punteggio decade linearmente: il primo problema costa il 20% +del peso della categoria, il secondo ne costa altri 20%, con un minimo di zero. Una categoria +con cinque o più problemi non contribuisce nulla al totale. I contributi ponderati vengono +sommati e arrotondati a un intero. + +### Tracciare le regressioni + +```bash +# Sul branch main — stabilisce o aggiorna il baseline +zenzic score --save + +# Su ogni pull request — blocca le regressioni della documentazione +zenzic diff --threshold 5 +``` + +`--threshold 5` dà ai collaboratori un margine di cinque punti. Impostalo a `0` per un gate +rigoroso dove qualsiasi regressione fa fallire la pipeline. + +### Punteggio minimo + +```bash +zenzic score --fail-under 80 +``` + +Utile quando il team si è impegnato a mantenere un livello di qualità definito, indipendentemente +da quello che era il punteggio la settimana scorsa. Puoi anche impostare `fail_under = 80` in +`zenzic.toml` per renderlo persistente. + +### Reporting soft + +Per rendere visibile il punteggio senza bloccare la pipeline: + +```bash +zenzic check all --exit-zero # report completo, esce 0 comunque +zenzic score # mostra il punteggio per visibilità +``` + +--- + +## `uvx` vs `uv run` vs `zenzic` diretto + +| Invocazione | Comportamento | Quando usare | +| :--- | :--- | :--- | +| `uvx zenzic ...` | Scarica ed esegue in un ambiente **isolato ed effimero** | Job una-tantum, pre-commit hook, CI senza fase di install del progetto | +| `uv run zenzic ...` | Esegue dal **virtual environment del progetto** (richiede `uv sync`) | Quando Zenzic è in `pyproject.toml` e serve comportamento version-pinned | +| `zenzic ...` (diretto) | Richiede Zenzic nel `$PATH` | Macchine developer con install globale | + +!!! tip "Raccomandazione CI" + Preferisci `uvx zenzic ...` per step CI che non installano già le dipendenze del progetto — + evita di aggiungere Zenzic all'insieme delle dipendenze di produzione. diff --git a/docs/it/usage/index.md b/docs/it/usage/index.md index a9758bb..5ee9620 100644 --- a/docs/it/usage/index.md +++ b/docs/it/usage/index.md @@ -5,11 +5,10 @@ icon: lucide/play -# Utilizzo +# Per iniziare -Zenzic legge direttamente dal filesystem e funziona con qualsiasi progetto, inclusi quelli che -non usano MkDocs come motore di build. Puoi usarlo in locale, come hook di pre-commit o nelle -tue pipeline CI. +Zenzic legge direttamente dal filesystem e funziona con qualsiasi progetto basato su Markdown. +Usalo in locale, come hook di pre-commit, nelle pipeline CI o per audit una-tantum. !!! tip "Vuoi eseguirlo subito?" @@ -21,6 +20,112 @@ tue pipeline CI. --- +## Installazione + +### Temporanea — nessuna installazione richiesta + +=== ":simple-astral: uv" + + ```bash + uvx zenzic check all + ``` + + `uvx` risolve ed esegue Zenzic da PyPI in un ambiente temporaneo. Nulla viene installato sul + sistema. La scelta giusta per audit una-tantum, `git hooks` e job CI dove si vuole evitare di + fissare una dipendenza dev. + +=== ":simple-pypi: pip" + + ```bash + pip install zenzic + zenzic check all + ``` + + Installazione standard nell'ambiente attivo. Usa all'interno di un virtual environment per + mantenere pulito il Python di sistema. + +### Strumento globale — disponibile in ogni progetto + +=== ":simple-astral: uv" + + ```bash + uv tool install zenzic + zenzic check all + ``` + + Installa una volta, usa in qualsiasi progetto. Il binario è disponibile nel `PATH` senza + attivare un virtual environment. + +=== ":simple-pypi: pip" + + ```bash + python -m venv ~/.local/zenzic-env + source ~/.local/zenzic-env/bin/activate # Windows: .venv\Scripts\activate + pip install zenzic + ``` + + Installa in un virtual environment dedicato, poi aggiungi la directory `bin/` al `PATH`. + +### Dipendenza dev del progetto — versione fissata per progetto + +=== ":simple-astral: uv" + + ```bash + uv add --dev zenzic + uv run zenzic check all + ``` + + Installa Zenzic nel virtual environment del progetto e fissa la versione in `uv.lock`. + La scelta giusta per progetti di team e pipeline CI che installano le dipendenze del progetto + prima di eseguire i controlli. + +=== ":simple-pypi: pip" + + ```bash + python -m venv .venv + source .venv/bin/activate # Windows: .venv\Scripts\activate + pip install zenzic + zenzic check all + ``` + + Pattern standard da dipendenza dev con virtual environment locale al progetto. + +### Validazione MkDocs — extra `zenzic[docs]` + +Il core engine di Zenzic è privo di dipendenze per design: validare link, snippet, riferimenti +e Shield richiede solo `zenzic`. Lo stack MkDocs (tema Material, plugin, ecc.) è necessario +solo per **renderizzare** la documentazione — non per validarla. + +Se il tuo progetto usa `mkdocs.yml` e vuoi che Zenzic lo validi come parte dei controlli, +installa l'extra opzionale: + +=== ":simple-astral: uv" + + ```bash + # Aggiungi l'extra [docs] insieme a Zenzic + uv add --dev "zenzic[docs]" + + # Oppure come esecuzione temporanea: + uvx "zenzic[docs]" check all + ``` + +=== ":simple-pypi: pip" + + ```bash + pip install "zenzic[docs]" + ``` + +L'extra `[docs]` installa `mkdocs-material`, `mkdocstrings`, `mkdocs-minify-plugin` e +`mkdocs-static-i18n` — lo stesso stack usato per costruire il sito di documentazione di +Zenzic. Se esegui solo `zenzic check all` senza renderizzare il sito, ometti l'extra. + +!!! note "Hugo, Zensical e altri engine" + L'extra `[docs]` è specifico per MkDocs. Per Zensical e altri adapter, installa il + pacchetto adapter di terze parti corrispondente (es. `pip install zenzic-hugo-adapter`). + Nessun extra è richiesto per `VanillaAdapter` (cartelle Markdown semplici). + +--- + ## Workflow Init → Config → Check Il workflow standard per adottare Zenzic in un progetto: @@ -49,7 +154,7 @@ engine-agnostico (modalità Vanilla). In entrambi i casi, tutte le impostazioni per default — decommenta e modifica solo i campi di cui hai bisogno. Eseguendo Zenzic senza un `zenzic.toml`, Zenzic utilizza i valori predefiniti e mostra un -pannello "Helpful Hint" che suggerisce `zenzic init`: +pannello Helpful Hint che suggerisce `zenzic init`: ```text ╭─ 💡 Zenzic Tip ─────────────────────────────────────────────────────╮ @@ -66,11 +171,11 @@ Modifica il `zenzic.toml` generato per silenziare il rumore e impostare soglie a ```toml # zenzic.toml — nella root del repository excluded_assets = [ - "assets/favicon.svg", + "assets/favicon.svg", # referenziato da mkdocs.yml, non da nessuna pagina .md "assets/social-preview.png", ] -placeholder_max_words = 30 -fail_under = 70 +placeholder_max_words = 30 # le pagine di reference tecnico sono intenzionalmente brevi +fail_under = 70 # stabilisce un quality floor iniziale ``` Consulta la [Guida alla Configurazione](../configuration/index.md) per l'elenco completo dei campi. @@ -80,14 +185,19 @@ Consulta la [Guida alla Configurazione](../configuration/index.md) per l'elenco Con il baseline stabilito, esegui Zenzic su ogni commit e pull request: ```bash +# Hook pre-commit o step CI zenzic check all --strict -zenzic score --save # salva baseline sul branch main -zenzic diff --threshold 5 # blocca le PR che regrediscono il baseline + +# Salva il baseline qualità sul branch main +zenzic score --save + +# Blocca le PR che regrediscono il baseline +zenzic diff --threshold 5 ``` --- -## Modalità Vanilla vs Modalità Engine-aware +## Modalità engine Zenzic opera in una di due modalità a seconda che riesca a trovare un file di configurazione del motore di build: @@ -97,481 +207,38 @@ del motore di build: Quando `mkdocs.yml` (MkDocs/Zensical) o `zensical.toml` (Zensical) è presente nella root del repository, Zenzic carica l'**adapter** corrispondente che fornisce: -- **Consapevolezza della nav** — il controllo orfani sa esattamente quali file dovrebbero essere - nella nav e quali no (ad esempio i file di locale i18n). -- **Fallback i18n** — i link cross-locale vengono risolti correttamente. +- **Consapevolezza della nav** — il controllo orfani sa la differenza tra "non nella nav" e "non + dovrebbe essere nella nav" (ad esempio i file di locale i18n). +- **Fallback i18n** — i link cross-locale vengono risolti correttamente invece di essere + segnalati come non validi. - **Soppressione directory locale** — i file sotto `docs/it/`, `docs/fr/`, ecc. non vengono segnalati come orfani. ### Modalità Vanilla -Quando non viene trovata alcuna configurazione del motore di build — o quando viene specificato un -nome di engine sconosciuto — Zenzic ricade su `VanillaAdapter`: +Quando non viene trovata alcuna configurazione del motore di build, Zenzic ricade su +`VanillaAdapter`. In questa modalità: -- **Il controllo orfani viene saltato.** Senza una dichiarazione di nav, ogni file Markdown - sembrerebbe un orfano, rendendo il controllo inutile. +- **Il controllo orfani viene saltato.** Senza una dichiarazione di nav, ogni file sembrerebbe + un orfano. - **Tutti gli altri controlli vengono eseguiti normalmente** — link, snippet, placeholder, asset - e riferimenti vengono tutti validati. + e riferimenti. -Modalità Vanilla è la scelta giusta per wiki Markdown semplici, repository GitHub-wiki, o +La modalità Vanilla è la scelta giusta per wiki Markdown semplici, repository GitHub-wiki o qualsiasi progetto dove la navigazione è implicita. !!! tip "Forza una modalità specifica" Usa `--engine` per sovrascrivere l'adapter rilevato per una singola esecuzione: ```bash - zenzic check all --engine vanilla # salta il controllo orfani - zenzic check all --engine mkdocs # forza l'adapter MkDocs + zenzic check all --engine vanilla # salta il controllo orfani + zenzic check all --engine mkdocs # forza l'adapter MkDocs ``` --- -## Opzioni di installazione - -### Temporanea — nessuna installazione richiesta - -```bash -uvx zenzic check all -``` - -`uvx` risolve ed esegue Zenzic da PyPI in un ambiente temporaneo. Nulla viene installato sul sistema. È la scelta giusta per audit una-tantum, `git hooks` e job CI dove si vuole evitare di fissare una dipendenza dev. - -### Strumento globale — disponibile in ogni progetto - -```bash -uv tool install zenzic -zenzic check all -``` - -Installa una volta, usa in qualsiasi progetto. Il binario è disponibile nel `PATH` senza attivare un virtual environment. - -### Dipendenza dev del progetto — versione fissata per progetto - -```bash -uv add --dev zenzic -uv run zenzic check all -``` - -Installa Zenzic nel virtual environment del progetto e fissa la versione in `uv.lock`. Scelta giusta per progetti di team. - -### Comandi - -```bash -# Controlli individuali -zenzic check links # Link interni; aggiungi --strict per la validazione HTTP esterna -zenzic check orphans # Pagine su disco mancanti dalla nav -zenzic check snippets # Blocchi Python che non compilano -zenzic check placeholders # Pagine stub: basso conteggio parole o pattern vietati -zenzic check assets # File media non referenziati da nessuna pagina - -# Autofix & Cleanup -zenzic clean assets # Elimina interattivamente gli asset non utilizzati -zenzic clean assets -y # Elimina gli asset non utilizzati immediatamente -zenzic clean assets --dry-run # Mostra cosa verrebbe eliminato senza farlo - -# Pipeline di riferimento (v0.2.0) -zenzic check references # Harvest → Cross-Check → Shield → Integrity score -zenzic check references --strict # Tratta le Dead Definitions come errori -zenzic check references --links # Valida anche gli URL via HTTP asincrono (1 ping/URL) - -# Tutti i controlli in sequenza -zenzic check all # Esegue tutti e sei i controlli -zenzic check all --strict # Tratta i warning come errori -zenzic check all --format json # Output machine-readable -zenzic check all --exit-zero # Segnala problemi ma esce sempre con codice 0 - -# Punteggio qualità -zenzic score # Calcola punteggio 0–100 -zenzic score --save # Calcola e persiste snapshot in .zenzic-score.json -zenzic score --fail-under 80 # Esce con 1 se il punteggio è sotto la soglia -zenzic score --format json # Report punteggio machine-readable - -# Rilevamento regressioni -zenzic diff # Confronta punteggio attuale con snapshot salvato -zenzic diff --threshold 5 # Esce con 1 solo se il calo è > 5 punti -zenzic diff --format json # Report diff machine-readable - -``` - -### Autofix & Cleanup - -Invece di limitarsi a segnalare i problemi, Zenzic può ripulire attivamente il tuo repository. `zenzic clean assets` legge la documentazione, trova tutti i file non utilizzati in `docs_dir` (rispettando rigorosamente `excluded_assets`, `excluded_dirs` e `excluded_build_artifacts`), e ti chiede conferma per eliminarli. Usa `--dry-run` per visualizzare un'anteprima in sicurezza o `-y` per automatizzare l'eliminazione nelle pipeline CI. - -### Server di sviluppo - -```bash -# Avvia il server con pre-flight di qualità -zenzic serve - -# Forza un motore specifico -zenzic serve --engine mkdocs -zenzic serve --engine zensical - -# Porta personalizzata (scansiona fino a 10 porte consecutive se occupata) -zenzic serve --port 9000 -zenzic serve -p 9000 - -# Salta il pre-flight e avvia direttamente il server -zenzic serve --no-preflight -``` - -`zenzic serve` rileva automaticamente il motore di documentazione dalla root del repository: - -| File di config presente | Binario disponibile | Risultato | -| :--- | :--- | :--- | -| `zensical.toml` | `zensical` o `mkdocs` | Avvia il motore disponibile | -| `zensical.toml` | nessuno | Errore — installa un motore | -| solo `mkdocs.yml` | `mkdocs` o `zensical` | Avvia il motore disponibile | -| solo `mkdocs.yml` | nessuno | Errore — installa un motore | -| nessuno | qualsiasi | Server statico su `site/` (senza hot-reload) | - -`zensical.toml` ha sempre la precedenza perché Zensical è un superset di MkDocs e legge `mkdocs.yml` nativamente. Il fallback statico permette a `zenzic serve` di funzionare in qualsiasi ambiente — anche senza mkdocs o zensical installati — purché esista una directory `site/` pre-compilata. - -Quando `--engine` è specificato esplicitamente, Zenzic verifica sia che il binario sia nel `$PATH` sia che il file di config richiesto esista. `--engine zensical` accetta `mkdocs.yml` come config valida per retrocompatibilità. - -**Gestione della porta.** Zenzic individua una porta libera tramite socket probe *prima* di avviare il subprocess dell'engine, poi passa `--dev-addr 127.0.0.1:{porta}` a mkdocs o zensical. Questo elimina l'errore `Address already in use` dall'engine: se la porta richiesta (default `8000`) è occupata, Zenzic prova silenziosamente le porte successive fino a dieci volte e indica quale porta viene effettivamente usata. - -Prima di avviare il server, Zenzic esegue un controllo pre-flight silenzioso — orfani, snippet, placeholder e asset non usati. I problemi vengono stampati come warning ma non bloccano mai l'avvio. La validazione dei link esterni (`check links --strict`) è intenzionalmente esclusa dal pre-flight: non ha senso attendere i roundtrip di rete quando stai per modificare la documentazione live. - -Il processo server eredita il terminale, quindi i log di hot-reload e l'output delle richieste appaiono non filtrati. Usa `--no-preflight` per saltare il controllo qualità quando sei nel mezzo di una fix e non hai bisogno del rumore. - -### Codici di uscita - -| Codice | Significato | -| :---: | :--- | -| `0` | Tutti i controlli selezionati sono passati (o `--exit-zero` era impostato) | -| `1` | Uno o più controlli hanno segnalato problemi | -| **`2`** | **SECURITY CRITICAL — Zenzic Shield ha rilevato una credenziale esposta** | - -!!! danger "Il codice di uscita 2 è riservato agli eventi di sicurezza" - Il codice 2 viene emesso esclusivamente da `zenzic check references` quando lo Shield - rileva un pattern di credenziale noto incorporato in un URL di riferimento. Non viene mai - usato per i fallimenti ordinari dei controlli. Se ricevi il codice di uscita 2, trattalo - come un incidente di sicurezza bloccante e **ruota immediatamente la credenziale esposta**. - -### Override dell'adapter engine - -Il flag `--engine` sovrascrive l'adapter del motore di build per una singola esecuzione senza -modificare `zenzic.toml`. È accettato da `check orphans` e `check all`: - -```bash -# Forza l'adapter MkDocs anche se zenzic.toml dice altro -zenzic check orphans --engine mkdocs -zenzic check all --engine mkdocs - -# Usa l'adapter Zensical (richiede che zensical.toml sia presente) -zenzic check orphans --engine zensical -zenzic check all --engine zensical -``` - -Se passi un nome di engine per cui non esiste un adapter registrato, Zenzic elenca gli adapter -disponibili ed esce con codice 1: - -```text -ERROR: Unknown engine adapter 'hugo'. -Installed adapters: mkdocs, vanilla, zensical -Install a third-party adapter or choose from the list above. -``` - ---- - -### Output JSON - -Passa `--format json` a `check all` per output strutturato: - -```bash -zenzic check all --format json | jq '.orphans' -zenzic check all --format json > report.json -``` - -Il report JSON contiene cinque chiavi che corrispondono ai nomi dei controlli: `links`, `orphans`, `snippets`, `placeholders`, `unused_assets`. Una lista vuota indica che il controllo è passato. - -### Punteggio qualità - -I controlli individuali rispondono a una domanda binaria: passa o fallisce. `zenzic score` risponde a una diversa: *quanto è sana questa documentazione, e sta migliorando o peggiorando nel tempo?* - -`zenzic score` esegue tutti e sei i controlli e aggrega i risultati in un singolo intero tra 0 e 100. Il punteggio è deterministico — a parità di stato della documentazione, produce sempre lo stesso numero — il che lo rende sicuro da tracciare nel controllo di versione e da confrontare tra branch. - -### Come è calcolato il punteggio - -Ogni categoria di controllo porta un peso fisso che riflette il suo impatto sull'esperienza del lettore: - -| Categoria | Peso | Rationale | -| :--- | ---: | :--- | -| links | 35 % | Un link non valido è un vicolo cieco immediato per il lettore | -| orphans | 20 % | Le pagine irraggiungibili sono invisibili | -| snippets | 20 % | Esempi di codice non validi fuorviano attivamente gli sviluppatori | -| placeholders | 15 % | Il contenuto stub segnala una pagina incompiuta o abbandonata | -| assets | 10 % | Gli asset non usati sono spreco, ma non bloccano il lettore | - -All'interno di ogni categoria, il punteggio decade linearmente: il primo problema costa 20 punti su 100 per quella categoria, il secondo ne costa altri 20, e così via, con un minimo di zero. Una categoria con cinque o più problemi non contribuisce nulla al totale. I contributi ponderati vengono sommati e arrotondati a un intero. - -Questo significa che un singolo link non valido fa scendere il punteggio totale di circa 7 punti (peso del 35% × decadimento del 20%), mentre un singolo asset non utilizzato costa circa 2 punti. I pesi codificano un giudizio intenzionale sulla gravità. - -### Tracciare le regressioni in CI - -Il punteggio diventa più utile se confrontato con un baseline noto. Il flag `--save` scrive il report corrente su `.zenzic-score.json` nella root del repository. Una volta che esiste un baseline, `zenzic diff` calcola il delta ed esce con codice non-zero se la documentazione è regredita. - -Un tipico setup CI su un progetto di team: - -```bash -# Stabilisce o aggiorna il baseline sul branch main -zenzic score --save - -# Su ogni pull request, blocca i merge che degradano la qualità -zenzic diff --threshold 5 -``` - -`--threshold 5` dà ai collaboratori un margine di cinque punti — piccoli cambiamenti non correlati (una nuova pagina stub, un commento TODO temporaneo) non bloccano una PR. Impostalo a `0` per un gate rigoroso dove qualsiasi regressione fa fallire la pipeline. - -### Imporre un punteggio minimo - -Usa `--fail-under` quando vuoi un floor assoluto piuttosto che un controllo relativo: - -```bash -zenzic score --fail-under 80 -``` - -È utile per le policy di documentation-as-a-feature dove il team si è impegnato a mantenere un livello di qualità definito, indipendentemente da quello che era il punteggio la settimana scorsa. - -### Reporting soft - -Per rendere visibile il punteggio senza bloccare la pipeline — utile durante uno sprint attivo di miglioramento della documentazione — combina `check all --exit-zero` con `score` in step separati: - -```bash -zenzic check all --exit-zero # report completo, esce 0 comunque -zenzic score # mostra il punteggio per visibilità -``` - ---- - -## Integrità dei riferimenti (v0.2.0) - -`zenzic check references` è il controllo più approfondito della suite. A differenza degli altri -controlli, che operano sulle singole pagine in isolamento, la pipeline di riferimento costruisce -una **vista globale** di tutte le Reference Definitions Markdown nell'intera documentazione prima -di validare qualsiasi utilizzo. - -### Perché due pass? - -Uno scanner single-pass produrrebbe falsi positivi per i *forward reference* — casi in cui -`[testo][id]` appare su una pagina prima che `[id]: url` sia definito più avanti nello stesso -file. La [Two-Pass Pipeline][arch-two-pass] risolve questo in modo pulito: - -- **Pass 1 — Harvesting**: legge ogni file, raccoglie tutte le definizioni `[id]: url` in una - [ReferenceMap][arch-refmap] per-file ed esegue lo Zenzic Shield su ogni URL. -- **Pass 2 — Cross-Check**: risolve ogni utilizzo `[testo][id]` rispetto alla ReferenceMap - completamente popolata e segnala le Dangling References. -- **Pass 3 — Integrity**: calcola il punteggio di integrità per-file dai dati di risoluzione. - -!!! warning "Non unire i pass" - Unire Harvesting e Cross-Check in un singolo ciclo produce falsi errori *Phantom Reference* - sui forward reference — un pattern comune nei grandi progetti di documentazione. La - separazione in due pass non è un'ottimizzazione; è un requisito di correttezza. - -### Comandi - -```bash -zenzic check references # Pipeline completa: Harvest → Cross-Check → Shield → score -zenzic check references --strict # Tratta le Dead Definitions come errori bloccanti -zenzic check references --links # Valida anche gli URL via HTTP asincrono (1 ping/URL) -``` - -`--links` attiva la [deduplicazione URL globale][arch-dedup]: ogni URL unico tra tutti i file -viene pingato esattamente una volta, indipendentemente da quante definizioni vi fanno riferimento. - -### Zenzic Shield - -!!! danger "Sicurezza — Exit Code 2" - Se `zenzic check references` esce con codice **2**, è stato trovato un segreto incorporato - in un URL di riferimento nella documentazione. **Ruota immediatamente la credenziale esposta.** - -Lo Shield scansiona ogni URL di riferimento durante il Pass 1 — prima che il Pass 2 validi i -link e prima che `--links` emetta qualsiasi richiesta HTTP. Un documento contenente una -credenziale esposta non viene mai usato per fare richieste in uscita. - -| Tipo di credenziale | Pattern | -| :--- | :--- | -| OpenAI API key | `sk-[a-zA-Z0-9]{48}` | -| GitHub token | `gh[pousr]_[a-zA-Z0-9]{36}` | -| AWS access key | `AKIA[0-9A-Z]{16}` | - -!!! tip "Integrazione pre-commit" - Aggiungi `zenzic check references` ai tuoi [pre-commit hook][pre-commit] per rilevare le - credenziali esposte prima che vengano mai committate nel controllo di versione. - -### Punteggio di integrità - -Ogni file riceve un **punteggio di integrità** per-file (0–100): il rapporto tra Reference -Definitions *usate* e quelle *totali*. Un punteggio di 100 significa che ogni definizione è -referenziata almeno una volta; punteggi più bassi indicano Dead Definitions. - -$$ -Reference\ Integrity = \frac{Resolved\ References}{Total\ Reference\ Definitions} -$$ - -Usa `--strict` per trattare le Dead Definitions come errori bloccanti e far fallire la pipeline -quando un file scende sotto il 100%. - ---- - -## Integrazione CI/CD - -Zenzic è progettato per workflow pipeline-first. Tutti i comandi escono con codice non-zero in -caso di fallimento — nessun wrapper aggiuntivo richiesto. - -### `uvx` vs `uv run` vs `zenzic` diretto - -| Invocazione | Comportamento | Quando usare | -| :--- | :--- | :--- | -| `uvx zenzic ...` | Scarica ed esegue Zenzic in un ambiente **isolato ed effimero** | Job una-tantum, pre-commit hook, step CI senza fase di install del progetto | -| `uv run zenzic ...` | Esegue Zenzic dal **virtual environment del progetto** (richiede `uv sync`) | Quando Zenzic è in `pyproject.toml` e serve comportamento version-pinned | -| `zenzic ...` (diretto) | Richiede Zenzic nel `$PATH` (dopo `uv tool install` o `pip install`) | Macchine developer con install globale persistente | - -!!! tip "Raccomandazione CI" - Preferisci `uvx zenzic ...` per step CI che non installano già le dipendenze del progetto. - Evita di aggiungere Zenzic all'insieme delle dipendenze di produzione sfruttando la cache - di risoluzione di [uv][uv]. - -### GitHub Actions — quality gate documentazione - -```yaml -# .github/workflows/zenzic-scan.yml -name: Qualità documentazione - -on: [push, pull_request] - -jobs: - docs-quality: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - name: Lint documentazione - run: uvx zenzic check all --strict - - - name: Integrità riferimenti + Shield - run: uvx zenzic check references - # Exit 1 su Dangling References - # Exit 2 immediatamente se Shield rileva una credenziale esposta - - - name: Controllo regressione punteggio - run: uvx zenzic diff --threshold 5 -``` - -!!! danger "Non sopprimere mai l'Exit Code 2" - Impostare `continue-on-error: true` su uno step che esegue `check references` vanifica - completamente lo Shield. Il codice 2 deve bloccare la pipeline — significa che una - credenziale live è stata trovata nel sorgente della documentazione. - -### Gestione baseline - -```yaml -# Sul branch main — stabilisce o aggiorna il baseline del punteggio -- name: Salva baseline qualità - if: github.ref == 'refs/heads/main' - run: uvx zenzic score --save - -# Su pull request — blocca le regressioni -- name: Controllo regressione punteggio - if: github.event_name == 'pull_request' - run: uvx zenzic diff --threshold 5 -``` - ---- - -## Scegliere tra le modalità - -Le due modalità non sono mutuamente esclusive. Molti progetti lo usano per i pre-commit hook e gli audit locali rapidi, e come gate definitivo in CI. - -| Scenario | Approccio consigliato | -| --- | --- | -| Audit una-tantum, nessuna installazione | `uvx zenzic check all` | -| Sviluppo locale, feedback rapido | `zenzic check all` (installazione globale o di progetto) | -| Pre-commit hook | `uvx zenzic check all` o `uv run zenzic check all` | -| CI: nessuno step di build MkDocs | CLI — `uv run zenzic check all` | -| Tracciare la qualità nel tempo | `zenzic score --save` su main + `zenzic diff` su PR | -| Imporre un punteggio minimo | `zenzic score --fail-under 80` | -| Report senza bloccare (sprint di cleanup) | `zenzic check all --exit-zero` o `fail_on_error: false` | -| Sviluppo locale con anteprima live | `zenzic serve` | -| Validazione link (sempre solo CLI) | `zenzic check links [--strict]` | -| Integrità riferimenti + scansione segreti | `zenzic check references [--strict] [--links]` | -| Rilevare credenziali esposte pre-commit | `zenzic check references` in pre-commit hook | - -Il controllo dei link e il controllo dei riferimenti sono sempre solo CLI. Il parser nativo opera - ---- - -## Utilizzo programmatico - -Il core di Zenzic è prima di tutto una libreria. Importa `ReferenceScanner` e `ReferenceMap` -direttamente nei tuoi tool di build, o test suite. - -### Scansione di un singolo file - -```python -from pathlib import Path - -from zenzic.core.scanner import ReferenceScanner -from zenzic.models.references import ReferenceMap - -# Ogni scanner opera su un singolo file Markdown -scanner = ReferenceScanner(Path("docs/api.md")) - -# Pass 1: harvest delle definizioni + esecuzione dello Shield -# Ogni evento è una tupla (lineno, event_type, data) -# event_type in {"DEF", "DUPLICATE_DEF", "IMG", "MISSING_ALT", "SECRET"} -security_findings = [] -for lineno, event, data in scanner.harvest(): - if event == "SECRET": - # Shield attivato — credenziale rilevata prima di qualsiasi richiesta HTTP - security_findings.append(data) - elif event == "DUPLICATE_DEF": - print(f" WARN [{lineno}]: definizione duplicata '{data}' (first wins per CommonMark §4.7)") - -# Pass 2: risolvi gli utilizzi rispetto alla ReferenceMap completamente popolata -# Deve essere chiamato dopo che harvest() è completamente consumato -cross_check_findings = scanner.cross_check() -for finding in cross_check_findings: - print(f" {finding.level}: {finding.message}") - -# Pass 3: report di integrità -report = scanner.get_integrity_report( - cross_check_findings=cross_check_findings, - security_findings=security_findings, -) -print(f"Integrità: {report.integrity_score:.1f}%") -``` - -### Orchestrazione multi-file - -Per scansionare un intero albero di documentazione con deduplicazione URL globale: - -```python -from pathlib import Path - -from zenzic.core.scanner import scan_docs_references_with_links - -reports, link_errors = scan_docs_references_with_links( - repo_root=Path("."), - validate_links=False, # imposta True per pingare ogni URL unico (1 richiesta/URL) -) - -for report in reports: - print(f"{report.file_path}: {report.integrity_score:.1f}%") - -for err in link_errors: - print(f" LINK ERROR: {err}") -``` - -`scan_docs_references_with_links` applica automaticamente il contratto Shield-as-firewall: -se il Pass 1 trova segreti in qualsiasi file, solleva `SystemExit(2)` prima che il Pass 2 -venga eseguito su qualsiasi file. - - +**Prossimi passi:** -[arch-two-pass]: ../architecture.md#two-pass-reference-pipeline-v020 -[arch-refmap]: ../architecture.md#gestione-dello-stato-referencemap-tra-i-pass -[arch-dedup]: ../architecture.md#deduplicazione-url-globale-via-linkvalidator -[pre-commit]: https://pre-commit.com/ -[uv]: https://docs.astral.sh/uv/ +- [Riferimento comandi CLI](commands.md) — ogni comando, flag e codice di uscita +- [Funzionalità avanzate](advanced.md) — integrità dei riferimenti, Shield, utilizzo programmatico +- [Integrazione CI/CD](../ci-cd.md) — GitHub Actions, pre-commit hook, gestione del baseline diff --git a/docs/usage/advanced.md b/docs/usage/advanced.md new file mode 100644 index 0000000..e2d6284 --- /dev/null +++ b/docs/usage/advanced.md @@ -0,0 +1,255 @@ +--- +icon: lucide/shield-check +--- + + + + +# Advanced Features + +Deep reference for the Three-Pass Pipeline, Zenzic Shield, accessibility checks, and +programmatic usage from Python. + +--- + +## Reference integrity (v0.2.0) + +`zenzic check references` runs the **Three-Pass Reference Pipeline** — the core engine behind +every reference-quality and security check Zenzic performs. + +### Why three passes? + +Markdown [reference-style links][syntax] separate *where a link points* (the definition) from +*where it appears* (the usage). A single-pass scanner cannot resolve a reference that appears +before its definition. Zenzic solves this with a deliberate three-pass structure: + +| Pass | Name | What happens | +| :---: | :--- | :--- | +| 1 | **Harvest** | Stream the file line-by-line; record all `[id]: url` definitions into a `ReferenceMap`; run the Shield on every URL and line | +| 2 | **Cross-Check** | Re-stream the file; for every `[text][id]` usage, look up `id` in the now-complete `ReferenceMap`; flag missing IDs as **Dangling References** | +| 3 | **Integrity Report** | Compute the integrity score; append **Dead Definitions**, duplicate-ID warnings, and alt-text warnings to the findings list | + +Pass 2 only begins when Pass 1 completes without security findings. If the Shield fires during +harvesting, Zenzic exits immediately with code 2 — no reference resolution occurs on files that +contain leaked credentials. + +### What the pipeline catches + +| Issue | Type | Blocks exit? | +| :--- | :---: | :---: | +| **Dangling Reference** — `[text][id]` where `id` has no definition | error | Yes | +| **Dead Definition** — `[id]: url` defined but never used by any link | warning | No (yes with `--strict`) | +| **Duplicate Definition** — same `id` defined twice; first wins (CommonMark §4.7) | warning | No | +| **Missing alt-text** — `![](url)` or `` with blank/absent alt | warning | No | +| **Secret detected** — credential pattern found in a reference URL or line | security | **Exit 2** | + +### Reference Integrity Score + +Each file receives a per-file score: + +```text +Reference Integrity = (resolved definitions / total definitions) × 100 +``` + +A file where every defined reference is used at least once scores 100. Unused (dead) definitions +pull the score down. When a file has no definitions at all, the score is 100 by convention. + +The integrity score is a **per-file diagnostic** — it does not feed into the `zenzic score` +overall quality score. Use it to identify files that accumulate unused reference link +boilerplate. + +--- + +## Zenzic Shield + +The Shield runs **inside Pass 1** — every URL extracted from a reference definition is scanned +the moment the harvester encounters it, before any other processing continues. The Shield also +applies a defence-in-depth pass to non-definition lines to catch secrets in plain prose. + +### Detected credential patterns + +| Pattern name | Regex | What it catches | +| :--- | :--- | :--- | +| `openai-api-key` | `sk-[a-zA-Z0-9]{48}` | OpenAI API keys | +| `github-token` | `gh[pousr]_[a-zA-Z0-9]{36}` | GitHub personal/OAuth tokens | +| `aws-access-key` | `AKIA[0-9A-Z]{16}` | AWS IAM access key IDs | +| `stripe-live-key` | `sk_live_[0-9a-zA-Z]{24}` | Stripe live secret keys | +| `slack-token` | `xox[baprs]-[0-9a-zA-Z]{10,48}` | Slack bot/user/app tokens | +| `google-api-key` | `AIza[0-9A-Za-z\-_]{35}` | Google Cloud / Maps API keys | +| `private-key` | `-----BEGIN [A-Z ]+ PRIVATE KEY-----` | PEM private keys (RSA, EC, etc.) | + +### Shield behaviour + +- **Every line is scanned** — including lines inside fenced code blocks (labelled or unlabelled). + A credential committed in a `bash` example is still a committed credential. +- Detection is **non-suppressible** — `--exit-zero`, `exit_zero = true` in `zenzic.toml`, and + `--strict` have no effect on Shield findings. +- Exit code 2 is reserved **exclusively** for Shield events. It is never used for ordinary check + failures. +- Files with security findings are **excluded from link validation** — Zenzic does not ping URLs + that may contain leaked credentials. +- **Code block link isolation** — while the Shield scans inside fenced blocks, the link and + reference validators do not. Example URLs inside code blocks (e.g. `https://api.example.com`) + never produce false-positive link errors. + +!!! danger "If you receive exit code 2" + Treat it as a build-blocking security incident. Rotate the exposed credential immediately, + then remove or replace the offending reference URL. Do not commit the secret into history. + +--- + +## Hybrid scanning logic + +Zenzic applies different scanning rules to prose and code blocks because the two contexts have +different risk profiles: + +| Content location | Shield (secrets) | Snippet syntax | Link / ref validation | +| :--- | :---: | :---: | :---: | +| Prose and reference definitions | ✓ | — | ✓ | +| Fenced block — supported language (`python`, `yaml`, `json`, `toml`) | ✓ | ✓ | — | +| Fenced block — unsupported language (`bash`, `javascript`, …) | ✓ | — | — | +| Fenced block — unlabelled (` ``` `) | ✓ | — | — | + +**Why links are excluded from fenced blocks:** documentation examples routinely contain +illustrative URLs (`https://api.example.com/v1/users`) that do not exist as real endpoints. +Checking them would produce hundreds of false positives with no security value. + +**Why secrets are included everywhere:** a credential embedded in a `bash` example is still +a committed secret. It lives in git history, is indexed by code-search tools, and can be +extracted by automated scanners that do not respect Markdown formatting. + +**Why syntax checking is limited to known parsers:** validating Bash or JavaScript would +require third-party parsers or subprocesses, violating the No-Subprocess Pillar. Zenzic +validates what it can validate purely in Python. + +--- + +## Alt-text accessibility + +`zenzic check references` also flags images that lack meaningful alt text: + +- **Markdown inline images** — `![](url)` or `![ ](url)` (blank alt string) +- **HTML `` tags** — `` with no `alt` attribute, or `alt=""` with no + content + +An explicitly empty `alt=""` is treated as intentionally decorative and is **not** flagged. +A completely absent `alt` attribute, or whitespace-only alt text, is flagged as a warning. + +Alt-text findings are warnings — they appear in the report but do not affect the exit code +unless `--strict` is active. + +--- + +## Programmatic usage + +Import Zenzic's scanner functions directly into your own Python tooling. + +### Single-file scan + +Use `ReferenceScanner` to run the three-pass pipeline on one file: + +```python +from pathlib import Path +from zenzic.core.scanner import ReferenceScanner + +scanner = ReferenceScanner(Path("docs/guide.md")) + +# Pass 1 — harvest definitions; collect Shield findings +security_findings = [] +for lineno, event_type, data in scanner.harvest(): + if event_type == "SECRET": + security_findings.append(data) + # In production: raise SystemExit(2) or typer.Exit(2) here + +# Pass 2 — resolve reference links (must be after harvest) +cross_check_findings = scanner.cross_check() + +# Pass 3 — compute integrity score and consolidate all findings +report = scanner.get_integrity_report(cross_check_findings, security_findings) + +print(f"Integrity score: {report.score:.1f}") +for f in report.findings: + level = "WARN" if f.is_warning else "ERROR" + print(f" [{level}] {f.file_path}:{f.line_no} — {f.detail}") +``` + +### Multi-file scan + +Use `scan_docs_references_with_links` to scan every `.md` file in a repository and optionally +validate external URLs: + +```python +from pathlib import Path +from zenzic.core.scanner import scan_docs_references_with_links +from zenzic.models.config import ZenzicConfig + +config, _ = ZenzicConfig.load(Path(".")) + +reports, link_errors = scan_docs_references_with_links( + Path("."), + validate_links=True, # set False to skip HTTP validation + config=config, +) + +for report in reports: + if report.security_findings: + raise SystemExit(2) # your code is responsible for exit-code enforcement + for finding in report.findings: + print(finding) + +for error in link_errors: + print(f"[LINK] {error}") +``` + +`scan_docs_references_with_links` deduplicates external URLs across the entire docs tree before +firing HTTP requests — 50 files linking to the same URL result in exactly one HEAD request. + +### Parallel scan (large repos) + +For repositories with more than ~200 Markdown files, use `scan_docs_references_parallel`: + +```python +from pathlib import Path +from zenzic.core.scanner import scan_docs_references_parallel + +reports = scan_docs_references_parallel(Path("."), workers=4) +``` + +Parallel mode uses `ProcessPoolExecutor`. External URL validation is not available in parallel +mode — use `scan_docs_references_with_links` for sequential scan with link validation. + +--- + +## Fenced-code and frontmatter exclusion + +The harvester and cross-checker both skip content that should never trigger findings: + +- **YAML frontmatter** — the leading `---` block (first line only) is skipped in its entirety, + including any reference-like syntax it might contain. +- **Fenced code blocks** — lines inside ` ``` ` or `~~~` fences are ignored. URLs in code + examples never produce false positives. + +This exclusion is applied consistently in both Pass 1 and Pass 2. + +--- + +## Multi-language documentation + +When your project uses [MkDocs i18n](https://github.com/ultrabug/mkdocs-static-i18n) or +Zensical's locale system, Zenzic adapts automatically: + +- **Locale directories suppressed from orphan detection** — files under `docs/it/`, `docs/fr/`, + etc. are not reported as orphans. The adapter detects locale directories from the engine's + i18n configuration. +- **Cross-locale link resolution** — the MkDocs and Zensical adapters resolve links that cross + locale boundaries (e.g. a link from `docs/it/page.md` to `docs/en/page.md`) without false + positives. +- **Vanilla mode skips orphan check entirely** — when no build-engine config is present, every + file would appear as an orphan. Zenzic skips the check rather than report noise. + +!!! tip "Force Vanilla mode to suppress orphan check" + ```bash + zenzic check all --engine vanilla + ``` + +[syntax]: https://spec.commonmark.org/0.31.2/#link-reference-definitions diff --git a/docs/usage/badges.md b/docs/usage/badges.md index 2af8546..22a9dd2 100644 --- a/docs/usage/badges.md +++ b/docs/usage/badges.md @@ -69,7 +69,7 @@ The `--fail-under` flag decouples the exit code from the score value. Teams can `zenzic score --save` writes `.zenzic-score.json` at the repo root: -```json +```text {"project": "zenzic", "score": 100, "threshold": 0, "status": "success", "timestamp": "...", "categories": [...]} ``` diff --git a/docs/usage/commands.md b/docs/usage/commands.md new file mode 100644 index 0000000..790cf18 --- /dev/null +++ b/docs/usage/commands.md @@ -0,0 +1,245 @@ +--- +icon: lucide/terminal +--- + + + + +# CLI Commands + +Complete reference for every Zenzic command, flag, and exit code. + +--- + +## Checks + +```bash +# Individual checks +zenzic check links # Internal links; add --strict for external HTTP validation +zenzic check orphans # Pages on disk missing from nav +zenzic check snippets # Python code blocks that fail to compile +zenzic check placeholders # Stub pages: low word count or forbidden patterns +zenzic check assets # Media files not referenced by any page +zenzic check references # Reference-style links + Zenzic Shield (credential detection) + +# All checks in sequence +zenzic check all # Run all checks +zenzic check all --strict # Also validate external URLs; treat warnings as errors +zenzic check all --format json # Machine-readable output +zenzic check all --exit-zero # Report issues but always exit 0 +zenzic check all --engine mkdocs # Override detected build engine adapter +``` + +### `--strict` flag + +| Command | Effect | +| :--- | :--- | +| `check links --strict` | Validates external HTTP/HTTPS URLs via concurrent network requests | +| `check all --strict` | Validates external URLs + treats warnings as errors | +| `check references --strict` | Treats Dead Definitions (unused reference links) as hard errors | +| `score --strict` / `diff --strict` | Runs link check in strict mode | + +You can also set `strict = true` in `zenzic.toml` to make it the permanent default. + +### `--exit-zero` flag + +Always exits with code `0` even when issues are found. All findings are still printed and +scored — only the exit code is suppressed. Useful for observation-only pipelines. + +You can also set `exit_zero = true` in `zenzic.toml` to make it the permanent default. + +--- + +## Autofix & Cleanup + +```bash +zenzic clean assets # Delete unused assets interactively (prompt before each) +zenzic clean assets -y # Delete unused assets immediately (no prompt) +zenzic clean assets --dry-run # Preview what would be deleted without deleting +``` + +`zenzic clean assets` respects `excluded_assets`, `excluded_dirs`, and +`excluded_build_artifacts` from `zenzic.toml` — it will never delete files that match these +patterns. + +--- + +## Development server + +```bash +zenzic serve # Start dev server with pre-flight quality check +zenzic serve --engine mkdocs # Force a specific engine +zenzic serve --engine zensical +zenzic serve --port 9000 # Custom starting port (tries up to 10 consecutive ports) +zenzic serve -p 9000 +zenzic serve --no-preflight # Skip pre-flight and start server immediately +``` + +`zenzic serve` auto-detects the documentation engine from the repository root: + +| Config file present | Engine binary available | Result | +| :--- | :--- | :--- | +| `zensical.toml` | `zensical` or `mkdocs` | Starts available engine | +| `zensical.toml` | neither | Error — install an engine | +| `mkdocs.yml` only | `mkdocs` or `zensical` | Starts available engine | +| `mkdocs.yml` only | neither | Error — install an engine | +| neither | any | Static file server on `site/` (no hot-reload) | + +Before launching the server, Zenzic runs a silent pre-flight check (orphans, snippets, +placeholders, unused assets). Issues are printed as warnings but never block startup. External +link validation is intentionally excluded from the pre-flight. Use `--no-preflight` to skip the +quality check entirely when you are mid-fix. + +**Port handling.** Zenzic probes for a free port via socket before launching the engine +subprocess, then passes `--dev-addr 127.0.0.1:{port}` to mkdocs or zensical. The +`Address already in use` error can never appear from the engine. + +--- + +## Exit codes + +| Code | Meaning | +| :---: | :--- | +| `0` | All selected checks passed (or `--exit-zero` was set) | +| `1` | One or more checks reported issues | +| **`2`** | **SECURITY CRITICAL — Zenzic Shield detected a leaked credential** | + +!!! danger "Exit code 2 is reserved for security events" + Exit code 2 is issued exclusively by `zenzic check references` when the Shield detects a + known credential pattern embedded in a reference URL. It is never used for ordinary check + failures. If you receive exit code 2, treat it as a build-blocking security incident and + **rotate the exposed credential immediately**. + +--- + +## JSON output + +Pass `--format json` to `check all` for structured output: + +```bash +zenzic check all --format json | jq '.orphans' +zenzic check all --format json > report.json +``` + +The JSON report contains seven keys: + +```json +{ + "links": [], + "orphans": [], + "snippets": [], + "placeholders": [], + "unused_assets": [], + "references": [], + "nav_contract": [] +} +``` + +Each key holds a list of issue strings or objects. An empty list means the check passed. +`nav_contract` validates `extra.alternate` links in `mkdocs.yml` against the Virtual Site Map +— always empty for non-MkDocs projects. + +--- + +## Engine override + +The `--engine` flag overrides the build engine adapter for a single run without modifying +`zenzic.toml`. Accepted by `check orphans` and `check all`: + +```bash +zenzic check orphans --engine mkdocs +zenzic check all --engine zensical +zenzic check all --engine vanilla # disable orphan check regardless of config +``` + +If you pass an engine name with no registered adapter, Zenzic lists available adapters and +exits with code 1: + +```text +ERROR: Unknown engine adapter 'hugo'. +Installed adapters: mkdocs, vanilla, zensical +Install a third-party adapter or choose from the list above. +``` + +Third-party adapters are discovered automatically once installed — no Zenzic update required. +See [Writing an Adapter](../developers/writing-an-adapter.md). + +--- + +## Quality scoring + +Individual checks answer a binary question: pass or fail. `zenzic score` answers a different one: +*how healthy is this documentation, and is it getting better or worse over time?* + +```bash +zenzic score # Compute 0–100 quality score +zenzic score --save # Compute and persist snapshot to .zenzic-score.json +zenzic score --fail-under 80 # Exit 1 if score is below threshold +zenzic score --format json # Machine-readable score report + +zenzic diff # Compare current score against saved snapshot +zenzic diff --threshold 5 # Exit 1 only if score dropped by more than 5 points +zenzic diff --format json # Machine-readable diff report +``` + +### How the score is computed + +Each check category carries a fixed weight that reflects its impact on the reader experience: + +| Category | Weight | Rationale | +| :--- | ---: | :--- | +| links | 35 % | A broken link is an immediate dead end for the reader | +| orphans | 20 % | Unreachable pages are invisible — they might as well not exist | +| snippets | 20 % | Invalid code examples actively mislead developers | +| placeholders | 15 % | Stub content signals an unfinished or abandoned page | +| assets | 10 % | Unused assets are waste, but they do not block the reader | + +Within each category, the score decays linearly: the first issue costs 20 % of the category +weight, the second costs another 20 %, floored at zero. A category with five or more issues +contributes nothing to the total. The weighted contributions are summed and rounded to an integer. + +### Regression tracking + +```bash +# On main — establish or refresh the baseline +zenzic score --save + +# On every pull request — block documentation regressions +zenzic diff --threshold 5 +``` + +`--threshold 5` gives contributors a five-point margin. Set it to `0` for a strict gate where +any regression fails the pipeline. + +### Minimum score floor + +```bash +zenzic score --fail-under 80 +``` + +Use this when the team has committed to maintaining a defined quality level, regardless of what +the score was last week. You can also set `fail_under = 80` in `zenzic.toml` to make it +persistent. + +### Soft reporting + +To surface the score without blocking the pipeline: + +```bash +zenzic check all --exit-zero # full report, exit 0 regardless +zenzic score # show score for visibility +``` + +--- + +## `uvx` vs `uv run` vs bare `zenzic` + +| Invocation | Behaviour | When to use | +| :--- | :--- | :--- | +| `uvx zenzic ...` | Downloads and runs in an **isolated, ephemeral** environment | One-off jobs, pre-commit hooks, CI with no project install phase | +| `uv run zenzic ...` | Runs from the **project's virtual environment** (requires `uv sync`) | When Zenzic is in `pyproject.toml` and you need version-pinned behaviour | +| `zenzic ...` (bare) | Requires Zenzic on `$PATH` | Developer machines with a global install | + +!!! tip "CI recommendation" + Prefer `uvx zenzic ...` for CI steps that do not already install project dependencies — it + avoids adding Zenzic to your production dependency set. diff --git a/docs/usage/index.md b/docs/usage/index.md index ed109eb..5182571 100644 --- a/docs/usage/index.md +++ b/docs/usage/index.md @@ -5,11 +5,10 @@ icon: lucide/play -# Usage +# Getting Started -Zenzic reads directly from the filesystem and works with any project, including those that do not -use MkDocs as their build driver. Use it in local development, as a pre-commit hook, in CI -pipelines, or for one-off audits. +Zenzic reads directly from the filesystem and works with any Markdown-based project. Use it +in local development, as a pre-commit hook, in CI pipelines, or for one-off audits. !!! tip "Just want to run it now?" @@ -21,121 +20,7 @@ pipelines, or for one-off audits. --- -## Init → Config → Check workflow - -The standard workflow for adopting Zenzic in a project: - -### 1. Init — scaffold a configuration file - -Bootstrap a `zenzic.toml` with a single command. Zenzic auto-detects the documentation engine and -pre-populates `[build_context]` accordingly: - -```bash -zenzic init -``` - -**Example output when `mkdocs.yml` is present:** - -```text -Created zenzic.toml - Engine pre-set to mkdocs (detected from mkdocs.yml). - -Edit the file to enable rules, adjust directories, or set a quality threshold. -Run zenzic check all to validate your documentation. -``` - -If no engine config file is found, `zenzic init` produces an engine-agnostic scaffold (Vanilla -mode). In either case, all settings are commented out by default — uncomment and adjust only the -fields you need. - -Run Zenzic without a `zenzic.toml` and it falls back to built-in defaults, printing a Helpful -Hint panel that suggests `zenzic init`: - -```text -╭─ 💡 Zenzic Tip ─────────────────────────────────────────────────────╮ -│ Using built-in defaults — no zenzic.toml found. │ -│ Run zenzic init to create a project configuration file. │ -│ Customise docs directory, excluded paths, engine adapter, and rules. │ -╰──────────────────────────────────────────────────────────────────────╯ -``` - -### 2. Config — tune to your project - -Edit the generated `zenzic.toml` to suppress noise and set thresholds appropriate to your project: - -```toml -# zenzic.toml — place at the repository root -excluded_assets = [ - "assets/favicon.svg", # referenced by mkdocs.yml, not by any .md page - "assets/social-preview.png", -] -placeholder_max_words = 30 # technical reference pages are intentionally brief -fail_under = 70 # establish an initial quality floor -``` - -See the [Configuration Reference](../configuration/index.md) for the full field list. - -### 3. Check — run continuously - -With the baseline established, run Zenzic on every commit and pull request: - -```bash -# Pre-commit hook or CI step -zenzic check all --strict - -# Save a quality baseline on main -zenzic score --save - -# Block PRs that regress the baseline -zenzic diff --threshold 5 -``` - ---- - -## Vanilla mode vs engine-aware mode - -Zenzic operates in one of two modes depending on whether it can discover a build-engine -configuration file: - -### Engine-aware mode - -When `mkdocs.yml` (MkDocs/Zensical) or `zensical.toml` (Zensical) is present at the repository -root, Zenzic loads the corresponding **adapter** which provides: - -- **Nav awareness** — the full navigation tree is known, so orphan detection can tell the - difference between "this file is not in the nav" and "this file is not supposed to be in the - nav" (e.g. i18n locale files). -- **i18n fallback** — cross-locale links are resolved correctly instead of being flagged as - broken. -- **Locale directory suppression** — files under `docs/it/`, `docs/fr/`, etc. are not reported - as orphans. - -This is the mode used by the vast majority of Zenzic users. - -### Vanilla mode - -When no build-engine configuration is found — or when an unknown engine name is specified — Zenzic -falls back to `VanillaAdapter`. In this mode: - -- **Orphan check is skipped.** Without a nav declaration, every Markdown file would appear to be - an orphan, which would produce useless noise rather than actionable findings. -- **All other checks run normally** — links, snippets, placeholders, assets, and references are - all validated as usual. - -Vanilla mode is the right choice for plain Markdown wikis, GitHub-wiki repos, or any project -where navigation is implicit rather than declared. - -!!! tip "Force a specific mode" - Use `--engine` to override the detected adapter for a single run: - - ```bash - zenzic check all --engine vanilla # skip orphan check regardless of config files - zenzic check all --engine mkdocs # force MkDocs adapter - ``` - ---- - -## Installation options +## Install ### Ephemeral — no installation required @@ -146,8 +31,8 @@ where navigation is implicit rather than declared. ``` `uvx` resolves and runs Zenzic from PyPI in a throwaway environment. Nothing is installed on - your system. This is the right choice for one-off audits, `git hooks`, and CI jobs where you - want to avoid pinning a dev dependency. + your system. The right choice for one-off audits, `git hooks`, and CI jobs where you want to + avoid pinning a dev dependency. === ":simple-pypi: pip" @@ -169,8 +54,7 @@ where navigation is implicit rather than declared. ``` Install once, use in any project. The binary is available on your `PATH` without activating - a virtual environment. Suitable for developers who work across multiple documentation projects - and want a consistent global tool. + a virtual environment. === ":simple-pypi: pip" @@ -192,8 +76,8 @@ where navigation is implicit rather than declared. ``` Installs Zenzic into the project's virtual environment and pins the version in `uv.lock`. - This is the right choice for team projects where everyone must use the same version, and for - CI pipelines that install project dependencies before running checks. + The right choice for team projects where everyone must use the same version, and for CI + pipelines that install project dependencies before running checks. === ":simple-pypi: pip" @@ -206,537 +90,151 @@ where navigation is implicit rather than declared. Standard dev-dependency pattern with a project-local virtual environment. -### Commands - -```bash -# Individual checks -zenzic check links # Internal links; add --strict for external HTTP validation -zenzic check orphans # Pages on disk missing from nav -zenzic check snippets # Python code blocks that fail to compile -zenzic check placeholders # Stub pages: low word count or forbidden patterns -zenzic check assets # Media files not referenced by any page - -# Autofix & Cleanup -zenzic clean assets # Delete unused assets interactively -zenzic clean assets -y # Delete unused assets immediately -zenzic clean assets --dry-run # Preview what would be deleted - -# Reference pipeline (v0.2.0) -zenzic check references # Harvest → Cross-Check → Shield → Integrity score -zenzic check references --strict # Treat Dead Definitions as errors -zenzic check references --links # Also validate reference URLs via async HTTP - -# All checks in sequence -zenzic check all # Run all six checks -zenzic check all --strict # Treat warnings as errors -zenzic check all --format json # Machine-readable output for downstream processing -zenzic check all --exit-zero # Report issues but always exit 0 - -# Quality score -zenzic score # Compute 0–100 quality score -zenzic score --save # Compute and persist snapshot to .zenzic-score.json -zenzic score --fail-under 80 # Exit 1 if score is below threshold -zenzic score --format json # Machine-readable score report - -# Regression detection -zenzic diff # Compare current score against saved snapshot -zenzic diff --threshold 5 # Exit 1 only if score dropped by more than 5 points -zenzic diff --format json # Machine-readable diff report -``` - -### Autofix & Cleanup - -Instead of just reporting issues, Zenzic can actively clean your repository. `zenzic clean assets` reads your documentation, finds all unused files in `docs_dir` (respecting `excluded_assets`, `excluded_dirs`, and `excluded_build_artifacts`), and prompts you to safely delete them. Use `--dry-run` to preview changes safely or `-y` to automate deletion in CI pipelines. - -### Development server - -```bash -# Start dev server with pre-flight quality check -zenzic serve - -# Force a specific engine -zenzic serve --engine mkdocs -zenzic serve --engine zensical - -# Custom port (scans up to 10 consecutive ports if busy) -zenzic serve --port 9000 -zenzic serve -p 9000 - -# Skip pre-flight and jump straight to the server -zenzic serve --no-preflight -``` - -`zenzic serve` auto-detects the documentation engine from the repository root: - -| Config file present | Engine binary available | Result | -| :--- | :--- | :--- | -| `zensical.toml` | `zensical` or `mkdocs` | Starts available engine | -| `zensical.toml` | neither | Error — install an engine | -| `mkdocs.yml` only | `mkdocs` or `zensical` | Starts available engine | -| `mkdocs.yml` only | neither | Error — install an engine | -| neither | any | Static file server on `site/` (no hot-reload) | - -`zensical.toml` always takes priority because Zensical is a superset of MkDocs and reads -`mkdocs.yml` natively. The static fallback lets `zenzic serve` work in any environment — even -without mkdocs or zensical installed — as long as a pre-built `site/` directory exists. +### MkDocs validation — `zenzic[docs]` extra -When `--engine` is specified explicitly, Zenzic validates both that the binary is on `$PATH` and -that the required config file exists. `--engine zensical` accepts `mkdocs.yml` as a valid config -for backwards compatibility. +Zenzic's core engine is dependency-free by design: validating links, snippets, references, +and the Shield requires nothing beyond `zenzic` itself. The MkDocs stack (Material theme, +plugins, etc.) is only needed to **render** your documentation — not to lint it. -**Port handling.** Zenzic resolves a free port via socket probing *before* launching the engine -subprocess, then passes `--dev-addr 127.0.0.1:{port}` to mkdocs or zensical. This means the -`Address already in use` error can never appear from the engine; if the requested port (default -`8000`) is busy, Zenzic silently tries the next port up to ten times and reports which port is -actually used. +If your project uses `mkdocs.yml` and you want to validate it as part of the Zenzic checks, +install the optional extra: -Before launching the server, Zenzic runs a silent pre-flight check — orphans, snippets, -placeholders, and unused assets. Issues are printed as warnings but never block startup; the intent -is to make them visible while you iterate. External link validation (`check links --strict`) is -intentionally excluded from the pre-flight: there is no value in waiting for network roundtrips -when you are about to fix the documentation live. - -The server process inherits your terminal so hot-reload logs and request output appear unfiltered. -Use `--no-preflight` to skip the quality check entirely when you are mid-fix and do not need the -noise. - -### Exit codes - -| Code | Meaning | -| :---: | :--- | -| `0` | All selected checks passed (or `--exit-zero` was set) | -| `1` | One or more checks reported issues | -| **`2`** | **SECURITY CRITICAL — Zenzic Shield detected a leaked credential** | - -!!! danger "Exit code 2 is reserved for security events" - Exit code 2 is issued exclusively by `zenzic check references` when the Shield detects a - known credential pattern embedded in a reference URL. It is never used for ordinary check - failures. If you receive exit code 2, treat it as a build-blocking security incident and - **rotate the exposed credential immediately**. - -### JSON output - -Pass `--format json` to `check all` for structured output suitable for downstream processing, -dashboards, or custom reporting tools: - -```bash -zenzic check all --format json | jq '.orphans' -zenzic check all --format json > report.json -``` - -The JSON report contains keys matching each check name: `links`, `orphans`, `snippets`, -`placeholders`, `unused_assets`, `references`. Each key holds a list of issue strings or objects. -An empty list means the check passed. - -### Overriding the adapter engine +=== ":simple-astral: uv" -The `--engine` flag overrides the build engine adapter for a single run without modifying -`zenzic.toml`. It is accepted by `check orphans` and `check all`: + ```bash + # Add the [docs] extra alongside Zenzic + uv add --dev "zenzic[docs]" -```bash -# Force the MkDocs adapter even if zenzic.toml says otherwise -zenzic check orphans --engine mkdocs -zenzic check all --engine mkdocs + # Or as an ephemeral run: + uvx "zenzic[docs]" check all + ``` -# Use the Zensical adapter (requires zensical.toml to be present) -zenzic check orphans --engine zensical -zenzic check all --engine zensical -``` +=== ":simple-pypi: pip" -If you pass an engine name that has no registered adapter, Zenzic lists the available adapters -and exits with code 1: + ```bash + pip install "zenzic[docs]" + ``` -```text -ERROR: Unknown engine adapter 'hugo'. -Installed adapters: mkdocs, vanilla, zensical -Install a third-party adapter or choose from the list above. -``` +The `[docs]` extra installs `mkdocs-material`, `mkdocstrings`, `mkdocs-minify-plugin`, and +`mkdocs-static-i18n` — the same stack used to build Zenzic's own documentation site. If you +**only** run `zenzic check all` without rendering the site, skip the extra entirely. -Third-party adapters (e.g. `zenzic-hugo-adapter`) are discovered automatically once installed — -no Zenzic update required. See [Writing an Adapter](../developers/writing-an-adapter.md). +!!! note "Hugo, Zensical, and other engines" + The `[docs]` extra is specific to MkDocs. For Zensical and other engine adapters, install + the corresponding third-party adapter package (e.g. `pip install zenzic-hugo-adapter`). + No extra is required for `VanillaAdapter` (plain Markdown folders). --- -### Quality scoring - -Individual checks answer a binary question: pass or fail. `zenzic score` answers a different one: -*how healthy is this documentation, and is it getting better or worse over time?* - -`zenzic score` runs all checks and aggregates their results into a single integer between 0 and -100. The score is deterministic — given the same documentation state, it always produces the same -number — which makes it safe to track in version control and compare across branches. - -### How the score is computed - -Each check category carries a fixed weight that reflects its impact on the reader experience: - -| Category | Weight | Rationale | -| :--- | ---: | :--- | -| links | 35 % | A broken link is an immediate dead end for the reader | -| orphans | 20 % | Unreachable pages are invisible — they might as well not exist | -| snippets | 20 % | Invalid code examples actively mislead developers | -| placeholders | 15 % | Stub content signals an unfinished or abandoned page | -| assets | 10 % | Unused assets are waste, but they do not block the reader | - -Within each category, the score decays linearly: the first issue costs 20 points out of 100 for -that category, the second costs another 20, and so on, floored at zero. A category with five or -more issues contributes nothing to the total. The weighted contributions are summed and rounded -to an integer. - -This means a single broken link drops the total score by roughly 7 points (35 % weight × 20 % -decay), while a single unused asset costs about 2 points. The weights encode an intentional -judgement about severity. - -### Tracking regressions in CI - -The score becomes most useful when compared against a known baseline. The `--save` flag writes the -current report to `.zenzic-score.json` at the repository root. Once a baseline exists, -`zenzic diff` computes the delta and exits non-zero if the documentation has regressed. - -A typical CI setup on a team project: - -```bash -# Establish or refresh the baseline on the main branch -zenzic score --save - -# On every pull request, block merges that degrade documentation quality -zenzic diff --threshold 5 -``` - -`--threshold 5` gives contributors a five-point margin — small, unrelated changes (a new stub -page, a temporary TODO comment) do not block a PR. Set it to `0` for a strict gate where any -regression fails the pipeline. - -### Enforcing a minimum score - -Use `--fail-under` when you want an absolute floor rather than a relative check: - -```bash -zenzic score --fail-under 80 -``` +## Init → Config → Check workflow -This is useful for documentation-as-a-feature policies where the team has committed to maintaining -a defined quality level, regardless of what the score was last week. +The standard workflow for adopting Zenzic in a project: -### Soft reporting +### 1. Init — scaffold a configuration file -To surface the score without blocking the pipeline — useful during an active documentation -improvement sprint — combine `check all --exit-zero` with `score` in separate steps: +Bootstrap a `zenzic.toml` with a single command. Zenzic auto-detects the documentation engine and +pre-populates `[build_context]` accordingly: ```bash -zenzic check all --exit-zero # full report, exit 0 regardless -zenzic score # show score for visibility +zenzic init ``` ---- - -## Reference integrity (v0.2.0) - -`zenzic check references` is the most thorough check in the suite. Unlike the other checks, which -operate on individual pages in isolation, the reference pipeline builds a **global view** of all -Markdown reference link definitions across your entire documentation before validating any usage. - -### Why two passes? - -A single-pass scanner would produce false positives for *forward references* — cases where -`[text][id]` appears on a page before `[id]: url` is defined later in the same file. The -[Two-Pass Pipeline][arch-two-pass] solves this cleanly: - -- **Pass 1 — Harvest**: reads every file, collects all `[id]: url` definitions into a - per-file [ReferenceMap][arch-refmap], and runs the Zenzic Shield on every URL. -- **Pass 2 — Cross-Check**: resolves every `[text][id]` usage against the fully-populated - ReferenceMap and reports any Dangling References. -- **Pass 3 — Integrity**: computes the per-file integrity score from the resolved usage data. - -!!! warning "Do not merge the passes" - Merging harvesting and cross-check into a single loop produces false *Phantom Reference* errors - on forward references — a common pattern in large documentation projects. The two-pass - separation is not an optimisation; it is a correctness requirement. +**Example output when `mkdocs.yml` is present:** -### Commands +```text +Created zenzic.toml + Engine pre-set to mkdocs (detected from mkdocs.yml). -```bash -zenzic check references # Full pipeline: Harvest → Cross-Check → Shield → score -zenzic check references --strict # Treat Dead Definitions (defined but never used) as errors -zenzic check references --links # Also validate reference URLs via async HTTP (1 ping/URL) +Edit the file to enable rules, adjust directories, or set a quality threshold. +Run zenzic check all to validate your documentation. ``` -`--links` triggers [global URL deduplication][arch-dedup]: every unique URL across all files is -pinged exactly once, regardless of how many definitions reference it. - -### Zenzic Shield - -!!! danger "Security — Exit Code 2" - If `zenzic check references` exits with code **2**, a secret was found embedded in a reference - URL inside your documentation. **Rotate the exposed credential immediately.** - -The Shield scans every reference URL for known credential patterns during Pass 1 — before Pass 2 -validates links, and before `--links` issues any HTTP request. A document containing a leaked -credential is never used to make outbound requests. - -| Credential type | Pattern | -| :--- | :--- | -| OpenAI API key | `sk-[a-zA-Z0-9]{48}` | -| GitHub token | `gh[pousr]_[a-zA-Z0-9]{36}` | -| AWS access key | `AKIA[0-9A-Z]{16}` | - -Patterns use exact-length quantifiers — no backtracking, O(1) per line regardless of line length. +If no engine config file is found, `zenzic init` produces an engine-agnostic scaffold (Vanilla +mode). In either case, all settings are commented out by default — uncomment and adjust only the +fields you need. -When the Shield fires, Zenzic emits a SECURITY CRITICAL banner and exits immediately with code 2: +Run Zenzic without a `zenzic.toml` and it falls back to built-in defaults, printing a Helpful +Hint panel that suggests `zenzic init`: ```text -╔══════════════════════════════════════╗ -║ SECURITY CRITICAL ║ -║ Secret(s) detected in documentation ║ -╚══════════════════════════════════════╝ - - [SHIELD] docs/api.md:12 — openai-api-key detected in URL - https://api.example.com/?key=sk-AbCdEfGhIj... - -Build aborted. Rotate the exposed credential immediately. +╭─ 💡 Zenzic Tip ─────────────────────────────────────────────────────╮ +│ Using built-in defaults — no zenzic.toml found. │ +│ Run zenzic init to create a project configuration file. │ +│ Customise docs directory, excluded paths, engine adapter, and rules. │ +╰──────────────────────────────────────────────────────────────────────╯ ``` -!!! tip "Pre-commit integration" - Add `zenzic check references` to your [pre-commit hooks][pre-commit] to catch leaked - credentials before they are ever committed to version control. - -### Integrity score +### 2. Config — tune to your project -Each file receives a per-file **integrity score** (0–100): the ratio of *used* reference -definitions to *total* definitions. A score of 100 means every definition is referenced at least -once; lower scores indicate Dead Definitions that may be dead weight or copy-paste residue. +Edit the generated `zenzic.toml` to suppress noise and set thresholds appropriate to your project: -```text -File integrity: docs/api.md — 100.0 (8/8 definitions used) -File integrity: docs/index.md — 75.0 (3/4 definitions used) -File integrity: docs/roadmap.md — 50.0 (2/4 definitions used) +```toml +# zenzic.toml — place at the repository root +excluded_assets = [ + "assets/favicon.svg", # referenced by mkdocs.yml, not by any .md page + "assets/social-preview.png", +] +placeholder_max_words = 30 # technical reference pages are intentionally brief +fail_under = 70 # establish an initial quality floor ``` -Use `--strict` to treat Dead Definitions as hard errors and fail the pipeline when any file -scores below 100. - -The formula is deterministic — no weights, no thresholds, just the ratio of Resolved References -to total Reference Definitions: +See the [Configuration Reference](../configuration/index.md) for the full field list. -$$ -Reference\ Integrity = \frac{Resolved\ References}{Total\ Reference\ Definitions} -$$ +### 3. Check — run continuously -### Alt-text accessibility +With the baseline established, run Zenzic on every commit and pull request: -The reference pipeline also emits `WARNING` findings for images without alt text: +```bash +# Pre-commit hook or CI step +zenzic check all --strict -```markdown - -![](docs/assets/screenshot.png) +# Save a quality baseline on main +zenzic score --save - -![Zenzic CLI output showing pass/fail summary](docs/assets/screenshot.png) +# Block PRs that regress the baseline +zenzic diff --threshold 5 ``` -These are warnings, not errors. They do not affect the exit code unless `--strict` is set. - --- -## CI/CD integration - -Zenzic is designed for pipeline-first workflows. All commands exit non-zero on failure — no -wrappers required. The examples below use GitHub Actions and [`uv`][uv]; the patterns apply to -any CI provider. - -### `uvx` vs `uv run` vs bare `zenzic` - -| Invocation | Behaviour | When to use | -| :--- | :--- | :--- | -| `uvx zenzic ...` | Downloads and runs Zenzic in an **isolated, ephemeral** environment | One-off jobs, pre-commit hooks, CI steps with no project install phase | -| `uv run zenzic ...` | Runs Zenzic from the **project's virtual environment** (requires `uv sync` first) | When Zenzic is in `pyproject.toml` and you need version-pinned behaviour | -| `zenzic ...` (bare) | Requires Zenzic on `$PATH` (after `uv tool install` or `pip install`) | Developer machines with a persistent global install | - -!!! tip "CI recommendation" - Prefer `uvx zenzic ...` for CI steps that do not already install project dependencies. - It avoids adding Zenzic to your production dependency set while still benefiting from - [uv][uv]'s dependency cache. - -### GitHub Actions — documentation quality gate - -```yaml -# .github/workflows/zenzic-scan.yml -name: Documentation quality - -on: [push, pull_request] - -jobs: - docs-quality: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - name: Lint documentation - run: uvx zenzic check all --strict - # Exit 1 on any check failure - - - name: Reference integrity + Shield - run: uvx zenzic check references - # Exit 1 on Dangling References - # Exit 2 immediately if Shield detects a leaked credential - - - name: Score regression check - run: uvx zenzic diff --threshold 5 - # Exit 1 if score dropped more than 5 points vs the saved baseline -``` - -### Handling Exit Code 2 in CI +## Engine modes -```yaml -- name: Reference integrity + Shield - id: shield - run: uvx zenzic check references - # Do NOT set continue-on-error: true — a Shield failure is a security event +Zenzic operates in one of two modes depending on whether it can discover a build-engine +configuration file: -- name: Shield failure annotation - if: failure() && steps.shield.outcome == 'failure' - run: | - echo "::error::Zenzic Shield triggered. Rotate exposed credentials before re-running." -``` +### Engine-aware mode -!!! danger "Never suppress Exit Code 2" - Setting `continue-on-error: true` on a step that runs `check references` defeats the - Shield entirely. Exit code 2 must block the pipeline — it means a live credential was found - in your documentation source. +When `mkdocs.yml` (MkDocs/Zensical) or `zensical.toml` (Zensical) is present at the repository +root, Zenzic loads the corresponding **adapter** which provides: -### Baseline management +- **Nav awareness** — orphan detection knows the difference between "not in the nav" and "not + supposed to be in the nav" (e.g. i18n locale files). +- **i18n fallback** — cross-locale links are resolved correctly instead of being flagged as broken. +- **Locale directory suppression** — files under `docs/it/`, `docs/fr/`, etc. are not reported + as orphans. -```yaml -# On the main branch — establish or refresh the score baseline -- name: Save quality baseline - if: github.ref == 'refs/heads/main' - run: uvx zenzic score --save - # Writes .zenzic-score.json — commit this file to version control +### Vanilla mode -# On pull requests — block regressions -- name: Score regression check - if: github.event_name == 'pull_request' - run: uvx zenzic diff --threshold 5 -``` +When no build-engine configuration is found, Zenzic falls back to `VanillaAdapter`. In this mode: -### Pre-commit hooks - -```yaml -# .pre-commit-config.yaml -repos: - - repo: local - hooks: - - id: zenzic-references - name: Zenzic Shield + reference integrity - language: system - entry: uvx zenzic check references - types: [markdown] - pass_filenames: false -``` +- **Orphan check is skipped.** Without a nav declaration, every file would appear to be an orphan. +- **All other checks run normally** — links, snippets, placeholders, assets, and references. -!!! tip - Because the Shield runs in Pass 1 — before any URL is validated or any HTTP request is - issued — this hook catches leaked credentials before they are ever committed, not just - before they are pushed. +Vanilla mode is the right choice for plain Markdown wikis, GitHub-wiki repos, or any project +where navigation is implicit rather than declared. ---- +!!! tip "Force a specific mode" + Use `--engine` to override the detected adapter for a single run: -## Choosing between modes - -The two modes are not mutually exclusive. Many projects use both: the CLI for pre-commit hooks and -quick local audits, and as the definitive gate in CI. - -| Scenario | Recommended approach | -| --- | --- | -| One-off audit, no install | `uvx zenzic check all` | -| Local development, quick feedback | `zenzic check all` (global or project install) | -| Pre-commit hook | `uvx zenzic check all` or `uv run zenzic check all` | -| CI: no MkDocs build step | CLI — `uv run zenzic check all` | -| Track quality over time | `zenzic score --save` on main + `zenzic diff` on PRs | -| Enforce a minimum quality floor | `zenzic score --fail-under 80` | -| Report without blocking (cleanup sprint) | `zenzic check all --exit-zero` or `fail_on_error: false` | -| Local development with live preview | `zenzic serve` | -| Link validation (always CLI only) | `zenzic check links [--strict]` | -| Reference integrity + secret scanning | `zenzic check references [--strict] [--links]` | -| Catch leaked credentials pre-commit | `zenzic check references` in pre-commit hook | - -The link check and reference check are always CLI-only. The native link extractor and reference -— `zenzic check links --strict` and `zenzic check references` are the recommended way to validate -links and reference integrity in CI. + ```bash + zenzic check all --engine vanilla # skip orphan check regardless of config files + zenzic check all --engine mkdocs # force MkDocs adapter + ``` --- -## Programmatic usage - -Zenzic's core is a library first. Import `ReferenceScanner` and `ReferenceMap` directly into -your build tools or test suites. - -### Single-file scan - -```python -from pathlib import Path - -from zenzic.core.scanner import ReferenceScanner -from zenzic.models.references import ReferenceMap - -# Each scanner operates on a single Markdown file -scanner = ReferenceScanner(Path("docs/api.md")) - -# Pass 1: harvest definitions + run the Shield -# Each event is a (lineno, event_type, data) tuple -# event_type in {"DEF", "DUPLICATE_DEF", "IMG", "MISSING_ALT", "SECRET"} -security_findings = [] -for lineno, event, data in scanner.harvest(): - if event == "SECRET": - # Shield fired — credential detected before any HTTP request is issued - security_findings.append(data) - elif event == "DUPLICATE_DEF": - print(f" WARN [{lineno}]: duplicate definition '{data}' (first wins per CommonMark §4.7)") - -# Pass 2: resolve usages against the fully-populated ReferenceMap -# Must be called after harvest() completes — never interleave the passes -cross_check_findings = scanner.cross_check() -for finding in cross_check_findings: - print(f" {finding.level}: {finding.message}") - -# Pass 3: integrity report -report = scanner.get_integrity_report( - cross_check_findings=cross_check_findings, - security_findings=security_findings, -) -print(f"Integrity: {report.integrity_score:.1f}%") -``` - -### Multi-file orchestration - -For scanning an entire documentation tree with global URL deduplication, use the high-level -orchestrator: - -```python -from pathlib import Path - -from zenzic.core.scanner import scan_docs_references_with_links - -reports, link_errors = scan_docs_references_with_links( - repo_root=Path("."), - validate_links=False, # set True to also ping every unique reference URL (1 request/URL) -) - -for report in reports: - print(f"{report.file_path}: {report.integrity_score:.1f}%") - -for err in link_errors: - print(f" LINK ERROR: {err}") -``` - -`scan_docs_references_with_links` enforces the Shield-as-firewall contract automatically: -if Pass 1 finds any secrets in any file, it raises `SystemExit(2)` before Pass 2 runs -on any file. No URL in a document containing a leaked credential is ever pinged. - - +**Next steps:** -[arch-two-pass]: ../architecture.md#two-pass-reference-pipeline-v020 -[arch-refmap]: ../architecture.md#referencemap-state-management-between-passes -[arch-dedup]: ../architecture.md#global-url-deduplication-via-linkvalidator -[pre-commit]: https://pre-commit.com/ -[uv]: https://docs.astral.sh/uv/ +- [CLI Commands reference](commands.md) — every command, flag, and exit code +- [Advanced features](advanced.md) — Reference integrity, Shield, programmatic usage +- [CI/CD Integration](../ci-cd.md) — GitHub Actions, pre-commit hooks, baseline management diff --git a/examples/broken-docs/README.md b/examples/broken-docs/README.md new file mode 100644 index 0000000..957ff9e --- /dev/null +++ b/examples/broken-docs/README.md @@ -0,0 +1,42 @@ + + + +# broken-docs — Intentional Failures Fixture + +This example intentionally triggers every Zenzic check. It exists to demonstrate +what failures look like and to serve as a regression fixture for the check engine. + +## What it demonstrates + +| Check | Trigger | +| --- | --- | +| Links — missing file | `non-existent.md` does not exist | +| Links — dead anchor | `#non-existent-section` not in any page | +| Links — path traversal | `../../../../etc/passwd` escapes `docs/` | +| Links — absolute path | `/assets/logo.png` is not a relative path | +| Links — broken i18n | `missing.it.md` does not exist on disk | +| Orphans | `api.md` exists on disk but is absent from `nav` | +| Snippets | `tutorial.md` contains a Python block with a `SyntaxError` | +| Placeholders | `api.md` has only 18 words and a task marker | +| Assets | `assets/unused.png` is on disk but never referenced | +| Custom rules | `[[custom_rules]]` in `zenzic.toml`: `ZZ-NOFIXME` pattern | + +## Run it + +```bash +cd examples/broken-docs + +# See all failures +zenzic check all + +# Suppress non-zero exit (useful in CI soft-gate mode) +zenzic check all --exit-zero +``` + +Expected exit code: **1** (check failures; no Shield events). + +## Engine + +Uses `engine = "mkdocs"`. The `mkdocs.yml` intentionally omits `api.md` from +the nav to trigger the orphan check. The `zensical.toml` provides an alternative +native-engine config demonstrating the same orphan trigger. diff --git a/examples/broken-docs/docs/index.md b/examples/broken-docs/docs/index.md index 1b67691..4d84d94 100644 --- a/examples/broken-docs/docs/index.md +++ b/examples/broken-docs/docs/index.md @@ -20,7 +20,7 @@ Run `zenzic check all --strict` from this directory to see the full report. | Links — **broken i18n** | [Italian page that does not exist](missing.it.md) | | Orphans | `api.md` exists on disk but is absent from `nav` | | Snippets | `tutorial.md` contains a Python block with a `SyntaxError` | -| Placeholders | `api.md` is a stub (fewer than 50 words, contains "TODO") | +| Placeholders | `api.md` has only 18 words and a bare task marker — see the file | | Assets | `assets/unused.png` is on disk but never referenced | The path traversal link above (`../../../../etc/passwd`) demonstrates the **Zenzic Shield**: @@ -35,4 +35,6 @@ The broken i18n link (`missing.it.md`) demonstrates cross-locale link validation checks that the target translation file actually exists on disk, even when i18n fallback is active. +![Used image](assets/used.png) + See the [Tutorial](tutorial.md) to continue. diff --git a/examples/broken-docs/docs/tutorial.md b/examples/broken-docs/docs/tutorial.md index 72282f1..b931e10 100644 --- a/examples/broken-docs/docs/tutorial.md +++ b/examples/broken-docs/docs/tutorial.md @@ -4,8 +4,8 @@ # Tutorial This tutorial page is intentionally well-formed: it has enough content to pass the placeholder -check and does not contain any forbidden stub patterns. The only issue here is the invalid Python -code block below. +check and does not contain any forbidden marker patterns. The only issue here is the invalid +Python code block below. ## Installation diff --git a/examples/broken-docs/zenzic.toml b/examples/broken-docs/zenzic.toml index 3280778..99e7b7e 100644 --- a/examples/broken-docs/zenzic.toml +++ b/examples/broken-docs/zenzic.toml @@ -1,8 +1,22 @@ # SPDX-FileCopyrightText: 2026 PythonWoods # SPDX-License-Identifier: Apache-2.0 # zenzic.toml — CLI configuration for the broken-docs example. -# api.md is intentionally absent from the nav (see mkdocs.yml) to trigger the orphan check. +# +# This example intentionally triggers every Zenzic check except nav-contract +# and references. Run `zenzic check all` to see the full failure report. +# Run `zenzic check all --exit-zero` to suppress the non-zero exit code. +# +# api.md is intentionally absent from the nav (see mkdocs.yml) to trigger +# the orphan check. +engine = "mkdocs" docs_dir = "docs" placeholder_max_words = 50 placeholder_patterns = ["TODO", "coming soon", "stub"] + +# Demonstrates the [[custom_rules]] DSL: flag any remaining FIXME markers. +[[custom_rules]] +id = "ZZ-NOFIXME" +pattern = "(?i)\\bFIXME\\b" +message = "FIXME markers must be resolved before publishing." +severity = "error" diff --git a/examples/i18n-standard/README.md b/examples/i18n-standard/README.md new file mode 100644 index 0000000..00929f6 --- /dev/null +++ b/examples/i18n-standard/README.md @@ -0,0 +1,46 @@ + + + +# i18n-standard — Gold Standard Bilingual Project + +This example demonstrates a perfectly structured bilingual documentation project +that scores **100/100** under `zenzic check all --strict`. + +## What it demonstrates + +- **Suffix-mode i18n**: translations live as `page.it.md` siblings alongside + `page.md` — never in a separate `docs/it/` subtree +- **Path symmetry**: a link `../../assets/brand-kit.zip` resolves identically + from both `page.md` and `page.it.md` +- **Build artifact exclusion**: `manual.pdf` and `brand-kit.zip` are listed in + `excluded_build_artifacts` — links to them are validated structurally without + requiring the files to exist on disk +- **`fail_under = 100`**: enforces a perfect score; any regression fails the gate + +## Run it + +```bash +cd examples/i18n-standard +zenzic check all --strict +``` + +Expected exit code: **0** — `SUCCESS: All checks passed.` + +## Structure + +```text +docs/ + index.md / index.it.md ← home page (EN + IT) + guides/ + index.md / index.it.md ← guides section + advanced/ + setup.md / setup.it.md ← deep nesting, ../../ relative links + tuning.md / tuning.it.md + reference/ + api.md / api.it.md ← API reference +``` + +## Engine + +Uses `engine = "mkdocs"` with the MkDocs `i18n` plugin in `docs_structure: suffix` mode. +`locales = ["it"]` and `default_locale = "en"` declared in `zenzic.toml`. diff --git a/examples/i18n-standard/docs/index.it.md b/examples/i18n-standard/docs/index.it.md index 8a2b894..5a3c292 100644 --- a/examples/i18n-standard/docs/index.it.md +++ b/examples/i18n-standard/docs/index.it.md @@ -14,7 +14,7 @@ di documentazione bilingue perfetta che ottiene **100/100** con `zenzic check al | **Zero link assoluti** | Ogni link interno è relativo (`../`, `./`) | | **Simmetria dei percorsi** | Un link `../assets/brand-kit.zip` si risolve identicamente da `.md` e `.it.md` | | **Integrità degli asset** | Tutti gli asset referenziati esistono su disco; nessun file ireferenziato | -| **Nessun segnaposto** | Ogni pagina ha contenuto reale, nessun stub TODO | +| **Nessun segnaposto** | Ogni pagina ha contenuto reale, nessun marcatore incompleto | ## Esplora la struttura diff --git a/examples/i18n-standard/docs/index.md b/examples/i18n-standard/docs/index.md index c153b86..758b74c 100644 --- a/examples/i18n-standard/docs/index.md +++ b/examples/i18n-standard/docs/index.md @@ -14,7 +14,7 @@ structured bilingual documentation project that scores **100/100** under `zenzic | **Zero absolute links** | Every internal link is relative (`../`, `./`) | | **Path symmetry** | A link `../assets/brand-kit.zip` resolves identically from `.md` and `.it.md` | | **Asset integrity** | All referenced assets exist on disk; no unreferenced files left dangling | -| **No placeholders** | Every page has real content, no TODO stubs | +| **No placeholders** | Every page has real content, no incomplete markers | ## Explore the structure diff --git a/examples/i18n-standard/docs/reference/api.md b/examples/i18n-standard/docs/reference/api.md index 4607e8e..bbe045a 100644 --- a/examples/i18n-standard/docs/reference/api.md +++ b/examples/i18n-standard/docs/reference/api.md @@ -5,7 +5,7 @@ This page documents the programmatic interface exposed by the i18n Standard example. It is intentionally simple — the goal is to demonstrate a valid reference page with -working cross-links, not to document a real API. +working cross-links, not to describe a real API. ## `check_links(docs_dir)` diff --git a/examples/i18n-standard/zenzic.toml b/examples/i18n-standard/zenzic.toml index 0dd066c..2af14b3 100644 --- a/examples/i18n-standard/zenzic.toml +++ b/examples/i18n-standard/zenzic.toml @@ -1,10 +1,18 @@ # SPDX-FileCopyrightText: 2026 PythonWoods # SPDX-License-Identifier: Apache-2.0 # zenzic.toml — Gold Standard configuration. -# Every check enabled, fail_under = 100. +# +# This example demonstrates a perfectly clean bilingual project. +# Target: zenzic check all --strict → SUCCESS, score 100/100. -docs_dir = "docs" -fail_under = 100 -# Paths relative to docs_dir. These files are generated at build time and -# must not exist on disk for Zenzic to give a green result — that is the point. +engine = "mkdocs" +docs_dir = "docs" +default_locale = "en" +locales = ["it"] +fail_under = 100 + +# manual.pdf and brand-kit.zip are referenced in the docs but generated at +# build time — they do not exist on disk. List them here so Zenzic validates +# the links structurally without requiring the files to be present. +# Paths are relative to docs_dir. excluded_build_artifacts = ["assets/manual.pdf", "assets/brand-kit.zip"] diff --git a/examples/security_lab/README.md b/examples/security_lab/README.md new file mode 100644 index 0000000..190f6e9 --- /dev/null +++ b/examples/security_lab/README.md @@ -0,0 +1,44 @@ + + + +# security_lab — Zenzic Shield Test Fixture + +This example intentionally triggers the Zenzic Shield (credential detection) and +the link checker (path traversal, absolute links). It is a pre-release regression +fixture for the Shield subsystem. + +## What it demonstrates + +| File | Trigger | +| --- | --- | +| `traversal.md` | Path traversal: `../../etc/passwd` escapes `docs/` | +| `attack.md` | Path traversal + three fake credential patterns (OpenAI, GitHub, AWS) | +| `absolute.md` | Absolute links (`/assets/logo.png`, `/etc/passwd`) | + +## Run it + +```bash +cd examples/security_lab + +# Link check only — exits 1 (path traversal in traversal.md + attack.md) +zenzic check links --strict + +# Reference check — exits 2 (Shield: fake credentials in attack.md) +zenzic check references + +# Full suite — exits 2 (Shield takes priority over other failures) +zenzic check all +``` + +> **Note:** Exit code `2` (Shield event) cannot be suppressed by `--exit-zero`. +> This is by design — credential exposure is a hard build blocker. + +## Credentials + +The credentials in `attack.md` are **entirely fake** — they match the regex shape +of real credentials but are not valid tokens for any service. They exist solely to +exercise the Shield scanner. Do not replace them with real credentials. + +## Engine + +Uses `engine = "mkdocs"`. No i18n configuration. diff --git a/examples/security_lab/docs/attack.md b/examples/security_lab/docs/attack.md index 43d2022..54f614e 100644 --- a/examples/security_lab/docs/attack.md +++ b/examples/security_lab/docs/attack.md @@ -1,14 +1,15 @@ + # Security Lab — Zenzic Shield Test This file intentionally contains patterns that trigger both layers of the **Zenzic Shield**. -It exists to verify Shield behaviour before releases. Run from the repo root: +It exists to verify Shield behaviour before releases. Run from this directory: ```bash -zenzic check links --strict # triggers PathTraversal -zenzic check references # triggers credential detection (Exit 2) +zenzic check links --strict # triggers PathTraversal (exit 1) +zenzic check references # triggers credential detection (exit 2) ``` --- @@ -24,7 +25,7 @@ access occurs. Expected output: ```text -[ERROR] security_lab/attack.md — PathTraversal: ../../../etc/passwd escapes docs root +[ERROR] attack.md — PathTraversal: ../../../etc/passwd escapes docs root ``` --- @@ -33,7 +34,16 @@ Expected output: The reference definitions below embed intentionally fake, credential-shaped patterns. They exist **solely to exercise the Shield scanner** during pre-release testing. -Do not substitute real credentials — the Shield will trigger Exit 2 immediately. +All values are synthetic — they match the regex shape but are not valid credentials +for any service. + +[openai-key]: sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +[github-token]: ghp_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +[aws-key]: https://s3.example.com/AKIAIOSFODNN7EXAMPLEKEY/bucket +[stripe-key]: sk_live_XXXXXXXXXXXXXXXXXXXXXXXX +[slack-token]: xoxb-0000000000-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +[google-key]: https://maps.example.com/api?key=AIzaSYXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +[private-key]: -----BEGIN RSA PRIVATE KEY----- Expected output: @@ -43,11 +53,43 @@ Expected output: ║ Secret(s) detected in documentation ║ ╚══════════════════════════════════════╝ - [SHIELD] examples/security_lab/attack.md:21 — openai-api-key detected in URL - [SHIELD] examples/security_lab/attack.md:22 — github-token detected in URL - [SHIELD] examples/security_lab/attack.md:23 — aws-access-key detected in URL + [SHIELD] attack.md:35 — openai-api-key detected in reference + [SHIELD] attack.md:36 — github-token detected in reference + [SHIELD] attack.md:37 — aws-access-key detected in URL + [SHIELD] attack.md:38 — stripe-live-key detected in reference + [SHIELD] attack.md:39 — slack-token detected in reference + [SHIELD] attack.md:40 — google-api-key detected in URL + [SHIELD] attack.md:41 — private-key detected in reference Build aborted. Rotate the exposed credential immediately. ``` Exit code: **2** + +--- + +## Patterns covered + +| Pattern | Example shape | What it catches | +| --- | --- | --- | +| `openai-api-key` | `sk-` + 48 alphanum | OpenAI API keys | +| `github-token` | `gh[pousr]_` + 36 alphanum | GitHub personal/OAuth tokens | +| `aws-access-key` | `AKIA` + 16 `[0-9A-Z]` | AWS IAM access key IDs | +| `stripe-live-key` | `sk_live_` + 24 alphanum | Stripe live secret keys | +| `slack-token` | `xox[baprs]-` + 10–48 alphanum | Slack bot/user/app tokens | +| `google-api-key` | `AIza` + 35 alphanum/`-_` | Google Cloud / Maps API keys | +| `private-key` | `-----BEGIN * PRIVATE KEY-----` | PEM private keys (RSA, EC, etc.) | + +--- + +## Why exit code 2 is non-suppressible + +Shield events use exit code `2` — distinct from `1` (check failures) — so CI pipelines can +treat credential exposure as a hard blocker independently of `--exit-zero`. + +```bash +zenzic check references || true # --exit-zero does NOT suppress code 2 +``` + +See [traversal.md](traversal.md) for the path traversal demo and +[absolute.md](absolute.md) for the portability enforcement demo. diff --git a/examples/security_lab/docs/fenced.md b/examples/security_lab/docs/fenced.md new file mode 100644 index 0000000..a02189e --- /dev/null +++ b/examples/security_lab/docs/fenced.md @@ -0,0 +1,52 @@ + + + +# Security Lab — Fenced Block Shield Test + +This file verifies that the Shield scans **every line of source**, including +lines inside fenced code blocks. It exists to prevent a regression where +credentials inside `bash` or unlabelled examples could silently bypass detection. + +Run from this directory: + +```bash +zenzic check references # triggers credential detection (exit 2) +``` + +--- + +## Layer 3 — Credentials Inside Fenced Blocks + +The Shield must fire on these blocks even though no build engine would ever +render a credential hidden inside a code example as a live secret. + +### Unlabelled fence + +```text +export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLEKEY +``` + +### Bash fence + +```bash +export STRIPE_SECRET=sk_live_XXXXXXXXXXXXXXXXXXXXXXXX +``` + +Expected Shield output: + +```text +[SHIELD] fenced.md:20 — aws-access-key detected in line +[SHIELD] fenced.md:26 — stripe-live-key detected in line +``` + +Exit code: **2** + +--- + +## Why code blocks are not a safe hiding place + +A credential committed inside a bash example is still a committed credential. +It lives in git history, is indexed by code-search tools, and may be extracted +by automated scanners that do not respect Markdown formatting. + +The Shield deliberately ignores the fence boundary: **no line is invisible.** diff --git a/examples/security_lab/zenzic.toml b/examples/security_lab/zenzic.toml new file mode 100644 index 0000000..8db65d0 --- /dev/null +++ b/examples/security_lab/zenzic.toml @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 +# zenzic.toml — Security Lab configuration. +# +# This example intentionally triggers the Zenzic Shield (credential detection) +# and the link checker (path traversal, absolute links). +# +# Expected results: +# zenzic check links → EXIT 1 (path traversal in traversal.md + attack.md) +# zenzic check references → EXIT 2 (Shield: credentials in attack.md) +# zenzic check all → EXIT 2 (Shield takes priority) + +engine = "mkdocs" +docs_dir = "docs" diff --git a/examples/vanilla/README.md b/examples/vanilla/README.md new file mode 100644 index 0000000..df6c804 --- /dev/null +++ b/examples/vanilla/README.md @@ -0,0 +1,44 @@ + + + +# vanilla — Engine-Agnostic Docs Quality Gate + +This example demonstrates Zenzic running in **Vanilla mode**: no MkDocs, no +Zensical, no build engine of any kind. Just Markdown files and a `zenzic.toml`. + +## What it demonstrates + +- `engine = "vanilla"` enables engine-agnostic mode +- Links, snippets, placeholders, assets, and custom rules are all checked +- The orphan check is skipped — with no declared nav, every file is reachable +- `fail_under = 80` enforces a minimum quality score +- A `[[custom_rules]]` rule (`ZZ-NOHTML`) warns against raw HTML in Markdown + +## Run it + +```bash +cd examples/vanilla +zenzic check all +``` + +Expected exit code: **0** — `SUCCESS: All checks passed.` + +## Who this is for + +Any team writing Markdown documentation without MkDocs or Zensical: + +- Hugo, Docusaurus, Sphinx, Astro, Jekyll +- GitHub wikis or plain Markdown repos +- Projects that have not yet chosen a build engine + +Zenzic checks the **source** — the build engine is irrelevant. + +## Structure + +```text +docs/ + index.md ← home page + guides/ + setup.md ← install and configure + api.md ← programmatic interface reference +``` diff --git a/examples/vanilla/docs/guides/api.md b/examples/vanilla/docs/guides/api.md new file mode 100644 index 0000000..f46ec1f --- /dev/null +++ b/examples/vanilla/docs/guides/api.md @@ -0,0 +1,45 @@ + + + +# Python API Reference + +Zenzic exposes a programmatic interface for integration into build scripts, +pre-commit hooks, and CI pipelines that cannot use the CLI directly. + +## Single-file scanning + +```python +from pathlib import Path +from zenzic.core.scanner import ReferenceScanner + +scanner = ReferenceScanner(file_path=Path("docs/index.md")) +report = scanner.scan() + +for finding in report.rule_findings: + print(f"{finding.file_path}:{finding.line_no} — {finding.message}") +``` + +## Full documentation tree + +```python +from pathlib import Path +from zenzic.core.scanner import scan_docs_references_with_links + +reports = scan_docs_references_with_links(docs_root=Path("docs")) +for report in reports: + if report.has_errors(): + print(f"Errors in {report.file_path}") +``` + +## Exit code conventions + +| Code | Meaning | +| --- | --- | +| `0` | All checks passed | +| `1` | One or more check failures (broken links, orphans, etc.) | +| `2` | Shield event — credential detected (non-suppressible) | + +## Related + +- [Setup guide](setup.md) +- [Home](../index.md) diff --git a/examples/vanilla/docs/guides/setup.md b/examples/vanilla/docs/guides/setup.md new file mode 100644 index 0000000..63cdea8 --- /dev/null +++ b/examples/vanilla/docs/guides/setup.md @@ -0,0 +1,51 @@ + + + +# Setup Guide + +This guide covers installing and configuring the project in Vanilla mode. + +## Prerequisites + +- Python 3.11 or later +- [uv](https://docs.astral.sh/uv/) (recommended) or pip + +## Installation + +```bash +# Zero-install: run directly from PyPI +uvx zenzic check all + +# Or install as a project dev dependency +uv add --dev zenzic +``` + +## Minimal configuration + +Create a `zenzic.toml` at the repository root: + +```toml +engine = "vanilla" +docs_dir = "docs" +``` + +That is the entire required configuration. Run `zenzic check all` — Zenzic will +validate links, snippets, placeholders, and assets across every `.md` file under +`docs/`. + +## Adding custom rules + +Extend enforcement without writing Python: + +```toml +[[custom_rules]] +id = "ZZ-NOFIXME" +pattern = "(?i)\\bFIXME\\b" +message = "FIXME markers must be resolved before publishing." +severity = "error" +``` + +## Next steps + +- [API reference](api.md) — programmatic usage +- [Home](../index.md) diff --git a/examples/vanilla/docs/index.md b/examples/vanilla/docs/index.md new file mode 100644 index 0000000..34c8b32 --- /dev/null +++ b/examples/vanilla/docs/index.md @@ -0,0 +1,39 @@ + + + +# Vanilla Example + +This project has no build engine. There is no `mkdocs.yml`, no `zensical.toml`, +no Hugo config, no Docusaurus config. Just Markdown files and a `zenzic.toml`. + +Zenzic runs in **Vanilla mode**: links, snippets, placeholders, assets, and custom +rules are all checked. The orphan check is skipped — with no declared navigation, +every file is reachable by definition. + +## Run it + +```bash +cd examples/vanilla +zenzic check all +``` + +Expected result: `SUCCESS: All checks passed.` + +## What this example demonstrates + +- `engine = "vanilla"` in `zenzic.toml` enables engine-agnostic mode +- `fail_under = 80` enforces a minimum quality score +- A `[[custom_rules]]` rule warns against raw HTML in Markdown +- Internal links between pages are validated without any build tool +- Python snippets are syntax-checked without executing them + +## Explore the docs + +- [Setup guide](guides/setup.md) — install and configure this project +- [Python API](guides/api.md) — programmatic interface reference + +## Who should use Vanilla mode + +Any team that writes Markdown documentation but does not use MkDocs or Zensical: +Hugo, Docusaurus, Sphinx, Astro, Jekyll, plain GitHub wikis, or no build tool at all. +Zenzic checks the source — the build engine is irrelevant. diff --git a/examples/vanilla/zenzic.toml b/examples/vanilla/zenzic.toml new file mode 100644 index 0000000..3920b01 --- /dev/null +++ b/examples/vanilla/zenzic.toml @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 +# zenzic.toml — Vanilla (engine-agnostic) example. +# +# No mkdocs.yml, no zensical.toml, no build engine of any kind. +# Zenzic runs in Vanilla mode: links, snippets, placeholders, and assets +# are all checked. The orphan check is skipped (no nav to compare against). +# +# Use this mode for Hugo, Docusaurus, Sphinx, Astro, hand-written HTML, +# or any project where you want documentation quality gates without +# committing to a specific build toolchain. +# +# Target: zenzic check all → SUCCESS + +engine = "vanilla" +docs_dir = "docs" + +# Enforce a minimum quality floor. +fail_under = 80 + +# Project-specific lint rule: no inline HTML in docs. +[[custom_rules]] +id = "ZZ-NOHTML" +pattern = "<(?!--)[a-zA-Z]" +message = "Avoid raw HTML in Markdown — use native Markdown syntax instead." +severity = "warning" diff --git a/mkdocs.yml b/mkdocs.yml index 1a495f2..3b2a33f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -81,6 +81,8 @@ plugins: Overview: Panoramica User Guide: Guida Utente Getting Started: Primi Passi + CLI Commands: Comandi CLI + Advanced Features: Funzionalità Avanzate Available Checks: Controlli Disponibili Configuration: Configurazione Core Settings: Impostazioni di Base @@ -95,6 +97,7 @@ plugins: Architecture: Architettura API Reference: Riferimento API Writing an Adapter: Scrivere un Adapter + Example Projects: Progetti di Esempio About: Informazioni Philosophy: Filosofia License: Licenza @@ -138,7 +141,7 @@ extra_css: extra: build_date: !ENV [BUILD_DATE, "dev"] generator: false - project_version: "0.4.0rc2" + project_version: "0.4.0rc3" social: - icon: fontawesome/brands/github link: https://github.com/PythonWoods/zenzic @@ -160,9 +163,11 @@ extra: nav: - Home: index.md - Docs: - - Overview: docs/index.md + - Overview: guide/index.md - User Guide: - Getting Started: usage/index.md + - CLI Commands: usage/commands.md + - Advanced Features: usage/advanced.md - Available Checks: checks.md - Configuration: - Overview: configuration/index.md @@ -179,6 +184,7 @@ nav: - Architecture: architecture.md - API Reference: reference/api.md - Writing an Adapter: developers/writing-an-adapter.md + - Example Projects: developers/examples.md - Community: - Get Involved: community/index.md - How to Contribute: community/contribute/index.md diff --git a/noxfile.py b/noxfile.py index 8b99c77..8671f2f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -17,13 +17,19 @@ nox.options.sessions = ["lint", "format", "typecheck"] PYTHONS = ["3.11", "3.12", "3.13"] -_DEV = ("uv", "sync", "--active", "--group", "dev") + +# Per-group sync tuples — each session installs only what it needs. +_SYNC_TEST = ("uv", "sync", "--active", "--group", "test") +_SYNC_LINT = ("uv", "sync", "--active", "--group", "lint") +_SYNC_DOCS = ("uv", "sync", "--active", "--group", "docs") +_SYNC_RELEASE = ("uv", "sync", "--active", "--group", "release") +_SYNC_DEV = ("uv", "sync", "--active", "--group", "dev") @nox.session(python=PYTHONS) def tests(session: nox.Session) -> None: """Run the test suite with branch coverage across all supported Python versions.""" - session.run(*_DEV, external=True) + session.run(*_SYNC_TEST, external=True) session.run( "pytest", "--cov=src/zenzic", @@ -39,35 +45,35 @@ def lint(session: nox.Session) -> None: Read-only by default (used in CI). To auto-fix: nox -s lint -- --fix """ - session.run(*_DEV, external=True) + session.run(*_SYNC_LINT, external=True) session.run("ruff", "check", *session.posargs, "src/", "tests/") @nox.session(python="3.11") def format(session: nox.Session) -> None: # noqa: A001 """Check code formatting with ruff (read-only, used in CI).""" - session.run(*_DEV, external=True) + session.run(*_SYNC_LINT, external=True) session.run("ruff", "format", "--check", "src/", "tests/") @nox.session(python="3.11") def fmt(session: nox.Session) -> None: """Auto-format code with ruff in place (use during development).""" - session.run(*_DEV, external=True) + session.run(*_SYNC_LINT, external=True) session.run("ruff", "format", "src/", "tests/") @nox.session(python="3.11") def typecheck(session: nox.Session) -> None: """Run static type checking with mypy.""" - session.run(*_DEV, external=True) + session.run(*_SYNC_LINT, external=True) session.run("mypy", "src/") @nox.session(python="3.11") def reuse(session: nox.Session) -> None: """Verify REUSE/SPDX license compliance.""" - session.run(*_DEV, external=True) + session.run(*_SYNC_LINT, external=True) session.run("reuse", "lint") @@ -134,7 +140,7 @@ def _download_lucide_icons() -> None: @nox.session(python="3.11") def docs(session: nox.Session) -> None: """Build documentation with mkdocs in strict mode.""" - session.run(*_DEV, external=True) + session.run(*_SYNC_DOCS, external=True) _download_lucide_icons() _build_brand_kit_zip() session.run("mkdocs", "build", "--strict") @@ -146,7 +152,7 @@ def docs_serve(session: nox.Session) -> None: Pass a custom bind address via posargs: nox -s docs_serve -- -a 127.0.0.1:8001 """ - session.run(*_DEV, external=True) + session.run(*_SYNC_DOCS, external=True) _download_lucide_icons() session.run("mkdocs", "serve", *session.posargs) @@ -165,7 +171,7 @@ def _build_brand_kit_zip() -> None: @nox.session(python="3.11") def preflight(session: nox.Session) -> None: """Run all quality checks — equivalent to a full CI pipeline.""" - session.run(*_DEV, external=True) + session.run(*_SYNC_DEV, external=True) session.run("ruff", "check", "src/", "tests/") session.run("ruff", "format", "--check", "src/", "tests/") session.run("mypy", "src/") @@ -185,7 +191,7 @@ def preflight(session: nox.Session) -> None: @nox.session(python="3.11") def screenshot(session: nox.Session) -> None: """Regenerate docs/assets/screenshot.svg from examples/broken-docs output.""" - session.run(*_DEV, external=True) + session.run(*_SYNC_DEV, external=True) session.run("python", "scripts/generate_screenshot.py") diff --git a/pyproject.toml b/pyproject.toml index c1fc37f..dae39f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,15 +7,15 @@ build-backend = "hatchling.build" [project] name = "zenzic" -version = "0.4.0rc2" -description = "Engineering-grade linter and strict build wrapper for MkDocs documentation sites" +version = "0.4.0rc3" +description = "Engineering-grade, engine-agnostic linter and security shield for Markdown documentation" readme = "README.md" requires-python = ">=3.11" # PEP 639: SPDX expression string (replaces deprecated `license = { text = "..." }`) license = "Apache-2.0" license-files = ["LICENSE", "LICENSES/*.txt"] authors = [{ name = "PythonWoods", email = "dev@pythonwoods.dev" }] -keywords = ["documentation", "mkdocs", "zensical", "linter", "ci", "strict", "links"] +keywords = ["documentation", "mkdocs", "zensical", "linter", "ci", "strict", "links", "security", "yaml", "json", "toml"] classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console", @@ -37,6 +37,13 @@ dependencies = [ "httpx>=0.27", ] +[project.optional-dependencies] +docs = [ + "mkdocs-material[imaging]>=9.0.0", + "mkdocs-minify-plugin>=0.7.0", + "mkdocstrings[python]>=0.24.0", + "mkdocs-static-i18n>=1.3.1", +] [project.scripts] zenzic = "zenzic.main:cli_main" @@ -63,23 +70,35 @@ packages = ["src/zenzic"] # ─── Development dependencies (PEP 735 — managed by uv) ────────────────────── [dependency-groups] -dev = [ +test = [ "pytest>=8.0.0", "pytest-cov>=4.1.0", +] +lint = [ "ruff>=0.3.0", "mypy>=1.5.0", "pre-commit>=3.0.0", - "nox>=2024.4.15", "reuse>=5.0.2", - "pip-audit>=2.7.0", - "bump-my-version>=1.2.6", + "types-pyyaml>=6.0.12.20250915", +] +docs = [ "mkdocs-material[imaging]>=9.0.0", "mkdocs-minify-plugin>=0.7.0", "mkdocstrings[python]>=0.24.0", "mkdocs-static-i18n>=1.3.1", - "types-pyyaml>=6.0.12.20250915", "requests>=2.33.0", # CVE-2026-25645 — transitive via mkdocs-material/pip-audit ] +release = [ + "nox>=2024.4.15", + "bump-my-version>=1.2.6", + "pip-audit>=2.7.0", +] +dev = [ + {include-group = "test"}, + {include-group = "lint"}, + {include-group = "docs"}, + {include-group = "release"}, +] # ─── Ruff ───────────────────────────────────────────────────────────────────── @@ -145,7 +164,7 @@ exclude_lines = [ # ─── Version bumping ─────────────────────────────────────────────────────────── [tool.bumpversion] -current_version = "0.4.0rc2" +current_version = "0.4.0rc3" commit = true tag = true tag_name = "v{new_version}" @@ -165,3 +184,57 @@ replace = '__version__ = "{new_version}"' filename = "mkdocs.yml" search = 'project_version: "{current_version}"' replace = 'project_version: "{new_version}"' + +[[tool.bumpversion.files]] +filename = "CITATION.cff" +search = "version: {current_version}" +replace = "version: {new_version}" + +[[tool.bumpversion.files]] +filename = "CITATION.cff" +search = "date-released: 2026-03-29" +replace = "date-released: {now:%Y-%m-%d}" + +[[tool.bumpversion.files]] +# CHANGELOG uses PEP 440 normalized form: 0.4.0-rc3 (hyphen before pre-release label). +# The serialize pattern below produces that form; pyproject uses 0.4.0rc3 (no hyphen). +filename = "CHANGELOG.md" +search = "[{current_version}]" +replace = "[{new_version}]" +serialize = ["{major}.{minor}.{patch}-{pre_l}{pre_n}", "{major}.{minor}.{patch}"] + +[[tool.bumpversion.files]] +filename = "CHANGELOG.it.md" +search = "[{current_version}]" +replace = "[{new_version}]" +serialize = ["{major}.{minor}.{patch}-{pre_l}{pre_n}", "{major}.{minor}.{patch}"] + +[[tool.bumpversion.files]] +filename = "RELEASE.md" +search = "**Release date:** 2026-03-29" +replace = "**Release date:** {now:%Y-%m-%d}" + +[[tool.bumpversion.files]] +filename = "RELEASE.it.md" +search = "**Data di rilascio:** 2026-03-29" +replace = "**Data di rilascio:** {now:%Y-%m-%d}" + +[[tool.bumpversion.files]] +filename = "docs/community/index.md" +search = "version = {{{current_version}}}," +replace = "version = {{{new_version}}}," + +[[tool.bumpversion.files]] +filename = "docs/community/index.md" +search = "date = {2026-03-29}," +replace = "date = {{now:%Y-%m-%d}}," + +[[tool.bumpversion.files]] +filename = "docs/it/community/index.md" +search = "version = {{{current_version}}}," +replace = "version = {{{new_version}}}," + +[[tool.bumpversion.files]] +filename = "docs/it/community/index.md" +search = "date = {2026-03-29}," +replace = "date = {{now:%Y-%m-%d}}," diff --git a/src/zenzic/__init__.py b/src/zenzic/__init__.py index 170cc05..541997a 100644 --- a/src/zenzic/__init__.py +++ b/src/zenzic/__init__.py @@ -2,4 +2,4 @@ # SPDX-License-Identifier: Apache-2.0 """Zenzic — engineering-grade documentation linter for MkDocs sites.""" -__version__ = "0.4.0rc2" +__version__ = "0.4.0rc3" diff --git a/src/zenzic/cli.py b/src/zenzic/cli.py index 15c82b9..eb7bcda 100644 --- a/src/zenzic/cli.py +++ b/src/zenzic/cli.py @@ -224,7 +224,15 @@ def check_references( all_errors.append(msg) for rf in report.rule_findings: - msg = f" [yellow]{rel}:{rf.line_no}[/] [{rf.rule_id}] — {rf.message}" + severity_color = "red" if rf.is_error else "yellow" + header = ( + f"[{severity_color}][{rf.rule_id}][/] [dim]{rel}:{rf.line_no}[/] — {rf.message}" + ) + if rf.matched_line: + snippet = rf.matched_line.rstrip() + msg = f"{header}\n [dim]│[/] [italic]{snippet}[/]" + else: + msg = header if rf.is_error: all_errors.append(msg) else: @@ -404,10 +412,12 @@ def _collect_all_results( @check_app.command(name="all") def check_all( - strict: bool = typer.Option(False, "--strict", "-s", help="Treat warnings as errors."), + strict: bool | None = typer.Option( + None, "--strict", "-s", help="Treat warnings as errors and validate external URLs." + ), output_format: str = typer.Option("text", "--format", help="Output format: text or json."), - exit_zero: bool = typer.Option( - False, "--exit-zero", help="Always exit 0; report issues without failing." + exit_zero: bool | None = typer.Option( + None, "--exit-zero", help="Always exit 0; report issues without failing." ), engine: str | None = typer.Option( None, @@ -423,7 +433,9 @@ def check_all( if not loaded_from_file: _print_no_config_hint() config = _apply_engine_override(config, engine) - results = _collect_all_results(repo_root, config, strict=strict) + effective_strict = strict if strict is not None else config.strict + effective_exit_zero = exit_zero if exit_zero is not None else config.exit_zero + results = _collect_all_results(repo_root, config, strict=effective_strict) if output_format == "json": report = { @@ -442,7 +454,7 @@ def check_all( "references": results.reference_errors, } print(json.dumps(report, indent=2)) - if results.failed and not exit_zero: + if results.failed and not effective_exit_zero: raise typer.Exit(1) return @@ -517,7 +529,7 @@ def check_all( if results.failed: console.print("\n[red]FAILED:[/] One or more checks failed.") - if not exit_zero: + if not effective_exit_zero: raise typer.Exit(1) else: console.print("\n[green]SUCCESS:[/] All checks passed.") @@ -755,7 +767,9 @@ def _run_all_checks( def score( - strict: bool = typer.Option(False, "--strict", "-s", help="Run link check in strict mode."), + strict: bool | None = typer.Option( + None, "--strict", "-s", help="Run link check in strict mode." + ), output_format: str = typer.Option("text", "--format", help="Output format: text or json."), save: bool = typer.Option(False, "--save", help="Save score snapshot to .zenzic-score.json."), fail_under: int = typer.Option( @@ -765,7 +779,8 @@ def score( """Compute a 0–100 documentation quality score across all checks.""" repo_root = find_repo_root() config, _ = ZenzicConfig.load(repo_root) - report = _run_all_checks(repo_root, config, strict=strict) + effective_strict = strict if strict is not None else config.strict + report = _run_all_checks(repo_root, config, strict=effective_strict) # CLI flag takes precedence; fall back to zenzic.toml; 0 means disabled. effective_threshold = fail_under if fail_under > 0 else config.fail_under @@ -795,7 +810,9 @@ def score( def diff( - strict: bool = typer.Option(False, "--strict", "-s", help="Run link check in strict mode."), + strict: bool | None = typer.Option( + None, "--strict", "-s", help="Run link check in strict mode." + ), output_format: str = typer.Option("text", "--format", help="Output format: text or json."), threshold: int = typer.Option( 0, @@ -810,6 +827,7 @@ def diff( """ repo_root = find_repo_root() config, _ = ZenzicConfig.load(repo_root) + effective_strict = strict if strict is not None else config.strict baseline = load_snapshot(repo_root) if baseline is None: @@ -819,7 +837,7 @@ def diff( ) raise typer.Exit(1) - current = _run_all_checks(repo_root, config, strict=strict) + current = _run_all_checks(repo_root, config, strict=effective_strict) delta = current.score - baseline.score if output_format == "json": diff --git a/src/zenzic/core/adapters/_base.py b/src/zenzic/core/adapters/_base.py index 9e58a3a..75396f3 100644 --- a/src/zenzic/core/adapters/_base.py +++ b/src/zenzic/core/adapters/_base.py @@ -24,6 +24,36 @@ def resolve_asset(self, missing_abs: Path, docs_root: Path) -> Path | None: """Return the default-locale fallback for a missing asset, or ``None``.""" ... + def resolve_anchor( + self, + resolved_file: Path, + anchor: str, + anchors_cache: dict[Path, set[str]], + docs_root: Path, + ) -> bool: + """Return ``True`` if an anchor miss should be suppressed via i18n fallback. + + When a file inside a locale sub-tree (e.g. ``docs/it/architecture.md``) + does not contain the requested anchor — because headings are translated — + this method checks whether the anchor exists in the corresponding + default-locale file (e.g. ``docs/architecture.md``). If it does, the + ``AnchorMissing`` error is suppressed: MkDocs / Zensical will serve the + default-locale page for this anchor at build time. + + Args: + resolved_file: Absolute path of the locale file that was found but + whose anchor set does not contain *anchor*. + anchor: The fragment identifier that was not found (without ``#``). + anchors_cache: Pre-built mapping of absolute ``Path`` → anchor slug + set. No disk I/O is performed — this is a pure in-memory check. + docs_root: Resolved absolute ``docs/`` root (for path stripping). + + Returns: + ``True`` if the anchor exists in the default-locale equivalent file + and the error should be suppressed; ``False`` otherwise. + """ + ... + def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: """Return ``True`` when *rel* is a locale-mirror of a nav-listed page.""" ... diff --git a/src/zenzic/core/adapters/_mkdocs.py b/src/zenzic/core/adapters/_mkdocs.py index 606f90b..43bb56c 100644 --- a/src/zenzic/core/adapters/_mkdocs.py +++ b/src/zenzic/core/adapters/_mkdocs.py @@ -15,6 +15,7 @@ import yaml +from zenzic.core.adapters._utils import remap_to_default_locale from zenzic.core.exceptions import ConfigurationError from zenzic.models.config import BuildContext @@ -290,14 +291,45 @@ def resolve_asset(self, missing_abs: Path, docs_root: Path) -> Path | None: """ if not self._fallback_to_default: return None - try: - rel = missing_abs.relative_to(docs_root) - except ValueError: - return None - if not rel.parts or rel.parts[0] not in self._locale_dirs: - return None - fallback = docs_root.joinpath(*rel.parts[1:]) - return fallback if fallback.exists() else None + fallback = remap_to_default_locale(missing_abs, docs_root, self._locale_dirs) + return fallback if fallback is not None and fallback.exists() else None + + def resolve_anchor( + self, + resolved_file: Path, + anchor: str, + anchors_cache: dict[Path, set[str]], + docs_root: Path, + ) -> bool: + """Return ``True`` if an anchor miss should be suppressed via i18n fallback. + + When a locale file (e.g. ``docs/it/architecture.md``) exists but does + not contain the requested anchor — because its headings are translated — + this method checks whether the anchor exists in the default-locale + equivalent (e.g. ``docs/architecture.md``). If it does, MkDocs will + serve the default-locale page for this anchor, so the error is spurious. + + The check is pure: only ``anchors_cache`` (already in memory) is + consulted. No disk I/O occurs. + + Args: + resolved_file: Absolute path of the locale file that was found but + whose anchor set does not contain *anchor*. + anchor: The fragment identifier that was not found (without ``#``). + anchors_cache: Pre-built mapping of absolute ``Path`` → anchor slug + set (same mapping used by :class:`~zenzic.core.resolver.InMemoryPathResolver`). + docs_root: Resolved absolute ``docs/`` root (for path stripping). + + Returns: + ``True`` if the anchor exists in the default-locale equivalent file; + ``False`` otherwise. + """ + if not self._fallback_to_default: + return False + default_file = remap_to_default_locale(resolved_file, docs_root, self._locale_dirs) + if default_file is None: + return False + return anchor.lower() in anchors_cache.get(default_file, set()) def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: """Return ``True`` when *rel* is a locale-mirror of a nav-listed page. @@ -313,10 +345,12 @@ def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: Returns: ``True`` when the file is a shadow of a nav-listed page. """ - if not rel.parts or rel.parts[0] not in self._locale_dirs: + default_abs = remap_to_default_locale( + self._docs_root / rel, self._docs_root, self._locale_dirs + ) + if default_abs is None: return False - default_rel = Path(*rel.parts[1:]).as_posix() - return default_rel in nav_paths + return default_abs.relative_to(self._docs_root).as_posix() in nav_paths def get_ignored_patterns(self) -> set[str]: """Return filename glob patterns for non-default locale files (suffix mode).""" diff --git a/src/zenzic/core/adapters/_utils.py b/src/zenzic/core/adapters/_utils.py new file mode 100644 index 0000000..776bb4c --- /dev/null +++ b/src/zenzic/core/adapters/_utils.py @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: 2026 PythonWoods +# SPDX-License-Identifier: Apache-2.0 +"""Shared i18n path-remapping utility for Zenzic adapters. + +All adapters that support ``fallback_to_default`` folder-mode i18n share the +same "locale prefix stripping" logic: given a path inside a locale sub-tree +(e.g. ``docs/it/architecture.md``), produce the corresponding default-locale +path (``docs/architecture.md``). + +This module centralises that logic so each adapter only needs to know *which* +directories are locale directories, and Zenzic owns *how* the remapping works. + +Third-party adapters that use a different i18n convention (e.g. Hugo's +``content//`` layout) are free to ignore this utility entirely and +implement their own ``resolve_asset`` / ``resolve_anchor`` logic. +""" + +from __future__ import annotations + +from pathlib import Path + + +def remap_to_default_locale( + abs_path: Path, + docs_root: Path, + locale_dirs: frozenset[str], +) -> Path | None: + """Return the default-locale equivalent of a path inside a locale sub-tree. + + Strips the first path component when it is a known locale directory name, + producing the canonical default-locale path. Returns ``None`` when the + path is not inside any known locale directory — the caller should not apply + fallback logic in that case. + + This function is **pure** — no I/O, no disk access. The caller decides + what to do with the returned path (existence check, anchor lookup, etc.). + + Examples:: + + remap_to_default_locale( + Path("/docs/it/architecture.md"), + Path("/docs"), + frozenset({"it", "fr"}), + ) + # → Path("/docs/architecture.md") + + remap_to_default_locale( + Path("/docs/architecture.md"), + Path("/docs"), + frozenset({"it", "fr"}), + ) + # → None (not inside a locale directory) + + Args: + abs_path: Absolute path to remap. May be a ``.md`` file, an asset, + or any path inside ``docs_root``. + docs_root: Resolved absolute ``docs/`` root. + locale_dirs: Frozenset of non-default locale directory names + (e.g. ``frozenset({"it", "fr"})``). + + Returns: + Absolute :class:`~pathlib.Path` of the default-locale equivalent, or + ``None`` when *abs_path* is not inside a recognised locale directory. + """ + try: + rel = abs_path.relative_to(docs_root) + except ValueError: + return None + if not rel.parts or rel.parts[0] not in locale_dirs: + return None + return docs_root.joinpath(*rel.parts[1:]) diff --git a/src/zenzic/core/adapters/_vanilla.py b/src/zenzic/core/adapters/_vanilla.py index b886d67..10f4b10 100644 --- a/src/zenzic/core/adapters/_vanilla.py +++ b/src/zenzic/core/adapters/_vanilla.py @@ -26,6 +26,16 @@ def resolve_asset(self, missing_abs: Path, docs_root: Path) -> Path | None: # n """Always ``None`` — no fallback logic without a locale tree.""" return None + def resolve_anchor( # noqa: ARG002 + self, + resolved_file: Path, + anchor: str, + anchors_cache: dict[Path, set[str]], + docs_root: Path, + ) -> bool: + """Always ``False`` — no i18n anchor fallback without a locale tree.""" + return False + def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: # noqa: ARG002 """Always ``False`` — no shadow pages without a nav.""" return False diff --git a/src/zenzic/core/adapters/_zensical.py b/src/zenzic/core/adapters/_zensical.py index 41572f6..650b3b9 100644 --- a/src/zenzic/core/adapters/_zensical.py +++ b/src/zenzic/core/adapters/_zensical.py @@ -11,6 +11,7 @@ from pathlib import Path from typing import Any +from zenzic.core.adapters._utils import remap_to_default_locale from zenzic.core.exceptions import ConfigurationError from zenzic.models.config import BuildContext @@ -91,21 +92,45 @@ def resolve_asset(self, missing_abs: Path, docs_root: Path) -> Path | None: """Return the default-locale fallback for a missing asset, or ``None``.""" if not self._fallback_to_default: return None - try: - rel = missing_abs.relative_to(docs_root) - except ValueError: - return None - if not rel.parts or rel.parts[0] not in self._locale_dirs: - return None - fallback = docs_root.joinpath(*rel.parts[1:]) - return fallback if fallback.exists() else None + fallback = remap_to_default_locale(missing_abs, docs_root, self._locale_dirs) + return fallback if fallback is not None and fallback.exists() else None + + def resolve_anchor( + self, + resolved_file: Path, + anchor: str, + anchors_cache: dict[Path, set[str]], + docs_root: Path, + ) -> bool: + """Return ``True`` if an anchor miss should be suppressed via i18n fallback. + + Locale configuration is sourced from ``BuildContext`` (``zenzic.toml``). + + Args: + resolved_file: Absolute path of the locale file whose anchor set + does not contain *anchor*. + anchor: Fragment identifier that was not found (without ``#``). + anchors_cache: Pre-built ``Path`` → anchor slug set mapping. + docs_root: Resolved absolute ``docs/`` root. + + Returns: + ``True`` if the anchor exists in the default-locale equivalent file. + """ + if not self._fallback_to_default: + return False + default_file = remap_to_default_locale(resolved_file, docs_root, self._locale_dirs) + if default_file is None: + return False + return anchor.lower() in anchors_cache.get(default_file, set()) def is_shadow_of_nav_page(self, rel: Path, nav_paths: frozenset[str]) -> bool: """Return ``True`` when *rel* is a locale-mirror of a nav-listed page.""" - if not rel.parts or rel.parts[0] not in self._locale_dirs: + default_abs = remap_to_default_locale( + self._docs_root / rel, self._docs_root, self._locale_dirs + ) + if default_abs is None: return False - default_rel = Path(*rel.parts[1:]).as_posix() - return default_rel in nav_paths + return default_abs.relative_to(self._docs_root).as_posix() in nav_paths def get_ignored_patterns(self) -> set[str]: """Empty set — Zensical does not use MkDocs suffix-mode i18n patterns.""" diff --git a/src/zenzic/core/rules.py b/src/zenzic/core/rules.py index 8344b10..ee18614 100644 --- a/src/zenzic/core/rules.py +++ b/src/zenzic/core/rules.py @@ -53,6 +53,9 @@ class RuleFinding: rule_id: Identifier of the rule that produced this finding. message: Human-readable description of the issue. severity: ``"error"``, ``"warning"``, or ``"info"``. + matched_line: Raw text of the offending line. Populated by + :class:`CustomRule`; empty string for Python-native rules + that do not provide line context. """ file_path: Path @@ -60,6 +63,7 @@ class RuleFinding: rule_id: str message: str severity: Severity = "error" + matched_line: str = field(default="") @property def is_error(self) -> bool: @@ -177,6 +181,7 @@ def check(self, file_path: Path, text: str) -> list[RuleFinding]: rule_id=self.id, message=self.message, severity=self.severity, + matched_line=line, ) ) return findings diff --git a/src/zenzic/core/scanner.py b/src/zenzic/core/scanner.py index 993e873..9dbdbc3 100644 --- a/src/zenzic/core/scanner.py +++ b/src/zenzic/core/scanner.py @@ -337,6 +337,42 @@ def find_unused_assets(repo_root: Path, config: ZenzicConfig | None = None) -> l HarvestEvent = tuple[int, str, Any] +def _skip_frontmatter( + fh: Any, +) -> Generator[tuple[int, str], None, None]: + """Yield ``(lineno, line)`` pairs from an open file handle, skipping YAML frontmatter. + + Frontmatter is a leading ``---`` block that ends with ``---`` or ``...``. + Every other line — including lines inside fenced code blocks — is yielded. + This is the raw stream used by the Shield so that secrets embedded inside + code examples are never invisible. + + Args: + fh: An open text file handle positioned at the start of the file. + + Yields: + ``(1-based line number, raw line string)`` for every non-frontmatter line. + """ + in_frontmatter = False + frontmatter_checked = False + + for lineno, line in enumerate(fh, start=1): + stripped = line.strip() + + if not frontmatter_checked: + frontmatter_checked = True + if stripped == "---": + in_frontmatter = True + continue + + if in_frontmatter: + if stripped in ("---", "..."): + in_frontmatter = False + continue + + yield lineno, line + + def _iter_content_lines( file_path: Path, ) -> Generator[tuple[int, str], None, None]: @@ -352,6 +388,9 @@ def _iter_content_lines( * **Fenced code blocks**: Lines inside ``` or ~~~ fences are skipped so that example URLs inside code never trigger false positives. + Use :func:`_skip_frontmatter` when the Shield needs to scan every line, + including lines inside fenced blocks. + Args: file_path: Path to the Markdown source file. @@ -359,25 +398,11 @@ def _iter_content_lines( ``(1-based line number, raw line string)`` for every content line. """ in_block = False - in_frontmatter = False - frontmatter_checked = False with file_path.open(encoding="utf-8") as fh: - for lineno, line in enumerate(fh, start=1): + for lineno, line in _skip_frontmatter(fh): stripped = line.strip() - # ── YAML frontmatter skip (first line only) ─────────────────── - if not frontmatter_checked: - frontmatter_checked = True - if stripped == "---": - in_frontmatter = True - continue - - if in_frontmatter: - if stripped in ("---", "..."): - in_frontmatter = False - continue - # ── Fenced code block skip ──────────────────────────────────── if not in_block: if stripped.startswith("```") or stripped.startswith("~~~"): @@ -484,12 +509,38 @@ def harvest(self) -> Generator[HarvestEvent, None, None]: findings are yielded immediately as ``("SECRET", SecurityFinding)`` events so callers can abort with Exit Code 2 before Pass 2 begins. + Uses two independent line streams from the same file: + + * **Shield stream** — every line except YAML frontmatter, including lines + inside fenced code blocks. Ensures that credentials in ``bash`` or + unlabelled code examples are never invisible to the Shield. + * **Content stream** — lines outside fenced blocks (``_iter_content_lines``). + Used for reference-definition harvesting and alt-text detection so that + example URLs inside code blocks never produce false positives. + + Reference definitions (``[id]: url``) are always outside fenced blocks by + CommonMark §4.7 convention, so scanning them on the content stream is + sufficient. The Shield additionally scans every definition URL via + ``scan_url_for_secrets`` to catch embedded secrets in reference URLs. + Yields: ``(lineno, event_type, data)`` tuples. See module-level type alias ``HarvestEvent`` for the full list of event types and data shapes. """ + # ── 1.a Shield pass: scan every line (fences are NOT skipped) ──────── + # Collect SECRET events keyed by line number so duplicate suppression + # (a definition URL that also matches scan_line_for_secrets) still works. + secret_line_nos: set[int] = set() + shield_events: list[HarvestEvent] = [] + with self.file_path.open(encoding="utf-8") as fh: + for lineno, line in _skip_frontmatter(fh): + for finding in scan_line_for_secrets(line, self.file_path, lineno): + shield_events.append((lineno, "SECRET", finding)) + secret_line_nos.add(lineno) + + # ── 1.b Content pass: harvest ref-defs and alt-text (fences skipped) ─ + content_events: list[HarvestEvent] = [] for lineno, line in _iter_content_lines(self.file_path): - # ── 1.b Reference definition detection ─────────────────────────── def_match = _RE_REF_DEF.match(line) if def_match: raw_id, url = def_match.group(1), def_match.group(2) @@ -497,29 +548,30 @@ def harvest(self) -> Generator[HarvestEvent, None, None]: norm_id = raw_id.lower().strip() if accepted: - yield (lineno, "DEF", (norm_id, url)) + content_events.append((lineno, "DEF", (norm_id, url))) # ── 1.c Shield: scan URL for secrets ───────────────────── for finding in scan_url_for_secrets(url, self.file_path, lineno): - yield (lineno, "SECRET", finding) + # Only emit if scan_line_for_secrets hasn't already + # emitted a SECRET for this line (avoid duplicates). + if lineno not in secret_line_nos: + shield_events.append((lineno, "SECRET", finding)) + secret_line_nos.add(lineno) else: - yield (lineno, "DUPLICATE_DEF", (norm_id, url)) + content_events.append((lineno, "DUPLICATE_DEF", (norm_id, url))) continue - # ── 1.d Shield: scan entire line for secrets (defence-in-depth) ── - # Applied only to non-definition lines to avoid duplicate SECRET - # events — definition URLs are already scanned by scan_url_for_secrets. - for finding in scan_line_for_secrets(line, self.file_path, lineno): - yield (lineno, "SECRET", finding) - # ── Alt-text: inline images ─────────────────────────────────────── for img_match in _RE_IMAGE_INLINE.finditer(line): alt_text = img_match.group(1) url = img_match.group(2) if alt_text.strip(): - yield (lineno, "IMG", (alt_text, url)) + content_events.append((lineno, "IMG", (alt_text, url))) else: - yield (lineno, "MISSING_ALT", url) + content_events.append((lineno, "MISSING_ALT", url)) + + # Yield all events in line-number order + yield from sorted(shield_events + content_events, key=lambda e: e[0]) # ── Pass 2: Cross-Check & Validation ────────────────────────────────────── diff --git a/src/zenzic/core/shield.py b/src/zenzic/core/shield.py index 77a7506..64a3e68 100644 --- a/src/zenzic/core/shield.py +++ b/src/zenzic/core/shield.py @@ -9,9 +9,13 @@ Supported patterns ------------------ -- OpenAI API key: ``sk-[a-zA-Z0-9]{48}`` -- GitHub token: ``gh[pousr]_[a-zA-Z0-9]{36}`` -- AWS access key: ``AKIA[0-9A-Z]{16}`` +- OpenAI API key: ``sk-[a-zA-Z0-9]{48}`` +- GitHub token: ``gh[pousr]_[a-zA-Z0-9]{36}`` +- AWS access key: ``AKIA[0-9A-Z]{16}`` +- Stripe live key: ``sk_live_[0-9a-zA-Z]{24}`` +- Slack token: ``xox[baprs]-[0-9a-zA-Z]{10,48}`` +- Google API key: ``AIza[0-9A-Za-z\\-_]{35}`` +- Generic private key: ``-----BEGIN [A-Z ]+ PRIVATE KEY-----`` Exit code contract ------------------ @@ -33,6 +37,10 @@ ("openai-api-key", re.compile(r"sk-[a-zA-Z0-9]{48}")), ("github-token", re.compile(r"gh[pousr]_[a-zA-Z0-9]{36}")), ("aws-access-key", re.compile(r"AKIA[0-9A-Z]{16}")), + ("stripe-live-key", re.compile(r"sk_live_[0-9a-zA-Z]{24}")), + ("slack-token", re.compile(r"xox[baprs]-[0-9a-zA-Z]{10,48}")), + ("google-api-key", re.compile(r"AIza[0-9A-Za-z\-_]{35}")), + ("private-key", re.compile(r"-----BEGIN [A-Z ]+ PRIVATE KEY-----")), ] diff --git a/src/zenzic/core/validator.py b/src/zenzic/core/validator.py index d1ce8cf..8c35397 100644 --- a/src/zenzic/core/validator.py +++ b/src/zenzic/core/validator.py @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: 2026 PythonWoods # SPDX-License-Identifier: Apache-2.0 -"""Validation logic: native link checking (internal + external) and Python snippet checks. +"""Validation logic: native link checking (internal + external) and snippet checks. Link validation no longer invokes any external process. Instead it uses a pure-Python two-pass approach: @@ -12,24 +12,34 @@ heading slugs extracted from the target file. 3. *External links* (``http://`` / ``https://``) are validated lazily — only when ``strict=True`` — via concurrent HEAD requests through ``httpx``. + +Snippet validation supports four languages using pure-Python parsers: + +- **Python** (``python``, ``py``) — ``compile()`` in ``exec`` mode +- **YAML** (``yaml``, ``yml``) — ``yaml.safe_load()`` +- **JSON** (``json``) — ``json.loads()`` +- **TOML** (``toml``) — ``tomllib.loads()`` (stdlib 3.11+) + +No subprocesses are spawned for any language. """ from __future__ import annotations import asyncio import fnmatch +import json import os import re +import tomllib from dataclasses import dataclass from pathlib import Path -from typing import Any, NamedTuple +from typing import Any from urllib.parse import urlsplit import httpx import yaml from zenzic.core.adapter import get_adapter -from zenzic.core.exceptions import ConfigurationError from zenzic.core.resolver import ( AnchorMissing, FileNotFound, @@ -51,34 +61,6 @@ class _PermissiveSafeLoader(yaml.SafeLoader): _PermissiveSafeLoader.add_multi_constructor("", lambda loader, tag_suffix, node: None) # type: ignore[no-untyped-call] -# ─── i18n Fallback Configuration ───────────────────────────────────────────── - - -class I18nFallbackConfig(NamedTuple): - """i18n fallback resolution config derived from the docs generator config. - - When ``enabled`` is ``True``, a :class:`FileNotFound` outcome for a link - whose source is under a non-default locale directory is re-checked against - the default-locale tree before an error is emitted. This mirrors the - ``fallback_to_default`` behaviour of the ``mkdocs-i18n`` plugin. - - Attributes: - enabled: ``True`` when ``fallback_to_default`` is active in the config. - default_locale: Locale string of the default language (e.g. ``"en"``). - locale_dirs: Frozenset of non-default locale directory names - (e.g. ``frozenset({"it", "fr"})``). - """ - - enabled: bool - default_locale: str - locale_dirs: frozenset[str] - - -_I18N_FALLBACK_DISABLED: I18nFallbackConfig = I18nFallbackConfig( - enabled=False, default_locale="", locale_dirs=frozenset() -) - - # ─── Regexes ────────────────────────────────────────────────────────────────── # Matches inline Markdown links [text](url) and images ![alt](url). @@ -217,139 +199,6 @@ def anchors_in_file(content: str) -> set[str]: return {slug_heading(m.group(1)) for m in _HEADING_RE.finditer(content)} -# ─── i18n fallback pure helpers ────────────────────────────────────────────── - - -def _extract_i18n_fallback_config(doc_config: dict[str, Any]) -> I18nFallbackConfig: - """Extract i18n fallback config from a parsed docs generator config dict. - - Returns :data:`_I18N_FALLBACK_DISABLED` for any configuration that does - not use folder-based i18n with ``fallback_to_default: true``. - - Args: - doc_config: Parsed YAML config dict (e.g. from ``mkdocs.yml``). - - Returns: - :class:`I18nFallbackConfig` describing the fallback settings. - - Raises: - :class:`~zenzic.core.exceptions.ConfigurationError`: When - ``fallback_to_default: true`` is set in folder mode but no - language entry has ``default: true``. Zenzic cannot infer - the fallback target locale. - """ - plugins = doc_config.get("plugins") - if not isinstance(plugins, list): - return _I18N_FALLBACK_DISABLED - for plugin in plugins: - if not isinstance(plugin, dict): - continue - i18n = plugin.get("i18n") - if not isinstance(i18n, dict): - continue - if i18n.get("docs_structure") != "folder": - break - if not i18n.get("fallback_to_default", False): - return _I18N_FALLBACK_DISABLED - # fallback_to_default: true — locate the default locale - default_locale = "" - locale_dirs: set[str] = set() - for lang in i18n.get("languages") or []: - if not isinstance(lang, dict): - continue - locale = lang.get("locale", "") - if not locale: - continue - if lang.get("default", False): - default_locale = locale - else: - locale_dirs.add(locale) - # Treat null/empty languages as "not configured" — null-safety guard. - # Only raise ConfigurationError when languages is a non-empty list - # but none of its entries declares default: true. - if not locale_dirs and not default_locale: - return _I18N_FALLBACK_DISABLED - if not default_locale: - raise ConfigurationError( - "i18n plugin has fallback_to_default: true but no language with " - "default: true — Zenzic cannot determine the fallback target locale.", - context={"docs_structure": "folder", "fallback_to_default": True}, - ) - return I18nFallbackConfig( - enabled=True, - default_locale=default_locale, - locale_dirs=frozenset(locale_dirs), - ) - return _I18N_FALLBACK_DISABLED - - -def _should_suppress_via_i18n_fallback( - asset_str: str, - source_file: Path, - docs_root: Path, - href: str, - fallback: I18nFallbackConfig, - resolver: InMemoryPathResolver, - known_assets: frozenset[str], -) -> bool: - """Return ``True`` if a :class:`FileNotFound` is covered by i18n fallback. - - Mirrors the ``fallback_to_default`` behaviour of the MkDocs i18n plugin: - when a translated file is absent, the build serves the default-locale - version. Zenzic suppresses the error when the link would resolve - correctly if the source file were in the default-locale tree. - - The check applies only when the resolved missing path is *inside* the - locale sub-tree (e.g. ``docs/it/api.md``). Links that already navigate - out of the locale dir at the Markdown level (e.g. ``../api.md`` which - normalises to ``docs/api.md``) are :class:`Resolved` directly and never - reach this function. - - Args: - asset_str: Normalised absolute path string of the missing target. - source_file: Absolute path of the file containing the link. - docs_root: Absolute documentation root directory. - href: Original href string (used for re-resolution from default root). - fallback: Fallback config extracted from ``mkdocs.yml``. - resolver: In-memory resolver instance (for ``.md`` fallback lookup). - known_assets: Pre-built frozenset of non-``.md`` asset paths. - - Returns: - ``True`` if the error should be suppressed; ``False`` otherwise. - """ - if not fallback.enabled: - return False - source_rel = source_file.relative_to(docs_root) - if not source_rel.parts or source_rel.parts[0] not in fallback.locale_dirs: - return False - - locale = source_rel.parts[0] - locale_prefix = str(docs_root) + os.sep + locale + os.sep - - # Fallback only applies when the missing target is inside the locale tree. - if not asset_str.startswith(locale_prefix): - return False - - stripped = asset_str[len(locale_prefix) :] - fallback_str = os.path.normpath(str(docs_root) + os.sep + stripped) - - # Non-.md assets: check the pre-built known_assets frozenset. - if fallback_str in known_assets: - return True - - # .md files: re-resolve from the default-locale equivalent source. - # _build_target uses only source_file.parent — the virtual source does - # not need to exist on disk. - rest = source_rel.parts[1:] - if not rest: - return False - default_source = docs_root / Path(*rest) - match resolver.resolve(default_source, href): - case Resolved(): - return True - return False - - # ─── Reference link pure helpers (S4-4) ────────────────────────────────────── @@ -672,7 +521,14 @@ async def validate_links_async( internal_errors.append( f"{label}:{lineno}: '{path_part}' not found in docs" ) - case AnchorMissing(path_part=path_part, anchor=anchor): + case AnchorMissing(path_part=path_part, anchor=anchor, resolved_file=resolved_file): + # Mirror the FileNotFound i18n fallback: when a locale file + # exists but lacks the anchor (because headings are translated), + # suppress the error if the anchor is present in the + # default-locale equivalent file. The build engine serves the + # default-locale page for this anchor at build time. + if adapter.resolve_anchor(resolved_file, anchor, anchors_cache, docs_root): + continue internal_errors.append( f"{label}:{lineno}: anchor '#{anchor}' not found in '{path_part}'" ) @@ -835,22 +691,26 @@ def validate_links(repo_root: Path, *, strict: bool = False) -> list[str]: return asyncio.run(validate_links_async(repo_root, strict=strict)) -# ─── Python snippet validation ──────────────────────────────────────────────── +# ─── Multi-language snippet validation ──────────────────────────────────────── +_VALIDATABLE_LANGS = frozenset({"python", "py", "yaml", "yml", "json", "toml"}) -def _extract_python_blocks(text: str) -> list[tuple[str, int]]: - """Return (snippet, fence_line_no) pairs for every fenced Python block in *text*. +def _extract_code_blocks(text: str) -> list[tuple[str, str, int]]: + """Return (lang, snippet, fence_line_no) triples for every validatable fenced block. + + Only blocks whose language tag is in ``_VALIDATABLE_LANGS`` are returned. Uses a deterministic line-by-line state machine rather than a regex so that - inline triple-backtick code spans (e.g. `` ` ```python ` ``) cannot cause the - matcher to run away across the rest of the file. + inline triple-backtick code spans (e.g. `` ` ```python ` ``) cannot cause + the matcher to run away across the rest of the file. *fence_line_no* is the 1-based line number of the opening fence. The closing fence must be a line whose stripped content is exactly three or more backticks (per CommonMark §4.5). """ - blocks: list[tuple[str, int]] = [] + blocks: list[tuple[str, str, int]] = [] in_block = False + current_lang = "" block_lines: list[str] = [] fence_line_no = 0 @@ -860,14 +720,15 @@ def _extract_python_blocks(text: str) -> list[tuple[str, int]]: if stripped.startswith("```"): info = stripped[3:].strip() lang = info.split()[0].lower() if info else "" - if lang in ("python", "py"): + if lang in _VALIDATABLE_LANGS: in_block = True + current_lang = lang block_lines = [] fence_line_no = lineno else: # Closing fence: line is only backtick characters (at least 3) if stripped.startswith("```") and not stripped.lstrip("`"): - blocks.append(("\n".join(block_lines), fence_line_no)) + blocks.append((current_lang, "\n".join(block_lines), fence_line_no)) in_block = False block_lines = [] else: @@ -881,7 +742,14 @@ def check_snippet_content( file_path: Path | str, config: ZenzicConfig | None = None, ) -> list[SnippetError]: - """Pure function: compile Python fenced code blocks in text. No I/O. + """Pure function: validate fenced code blocks in text using pure-Python parsers. No I/O. + + Supported languages: + + - **Python** (``python``, ``py``) — ``compile()`` in ``exec`` mode + - **YAML** (``yaml``, ``yml``) — ``yaml.safe_load()`` + - **JSON** (``json``) — ``json.loads()`` + - **TOML** (``toml``) — ``tomllib.loads()`` Args: text: Raw markdown content to analyse. @@ -889,7 +757,7 @@ def check_snippet_content( config: Optional Zenzic configuration. Returns: - List of SnippetError instances for each invalid Python code block. + List of SnippetError instances for each invalid code block. """ if config is None: config = ZenzicConfig() @@ -897,28 +765,65 @@ def check_snippet_content( path = Path(file_path) errors: list[SnippetError] = [] - for snippet, fence_line in _extract_python_blocks(text): + for lang, snippet, fence_line in _extract_code_blocks(text): if len(snippet.strip().splitlines()) < config.snippet_min_lines: continue - try: - compile(snippet, str(path), "exec") - except SyntaxError as exc: - errors.append( - SnippetError( - file_path=path, - line_no=fence_line + (exc.lineno or 1), - message=f"SyntaxError in Python snippet — {exc.msg}", + if lang in ("python", "py"): + try: + compile(snippet, str(path), "exec") + except SyntaxError as exc: + errors.append( + SnippetError( + file_path=path, + line_no=fence_line + (exc.lineno or 1), + message=f"SyntaxError in Python snippet — {exc.msg}", + ) ) - ) - except Exception as exc: - errors.append( - SnippetError( - file_path=path, - line_no=fence_line + 1, - message=f"ParserError in Python snippet — {type(exc).__name__}: {exc}", + except Exception as exc: + errors.append( + SnippetError( + file_path=path, + line_no=fence_line + 1, + message=f"ParserError in Python snippet — {type(exc).__name__}: {exc}", + ) + ) + + elif lang in ("yaml", "yml"): + try: + yaml.safe_load(snippet) + except yaml.YAMLError as exc: + errors.append( + SnippetError( + file_path=path, + line_no=fence_line + 1, + message=f"SyntaxError in YAML snippet — {exc}", + ) + ) + + elif lang == "json": + try: + json.loads(snippet) + except json.JSONDecodeError as exc: + errors.append( + SnippetError( + file_path=path, + line_no=fence_line + exc.lineno, + message=f"SyntaxError in JSON snippet — {exc.msg}", + ) + ) + + elif lang == "toml": + try: + tomllib.loads(snippet) + except tomllib.TOMLDecodeError as exc: + errors.append( + SnippetError( + file_path=path, + line_no=fence_line + 1, + message=f"SyntaxError in TOML snippet — {exc}", + ) ) - ) return errors @@ -1027,7 +932,7 @@ def validate(self) -> list[str]: def validate_snippets(repo_root: Path, config: ZenzicConfig | None = None) -> list[SnippetError]: - """Compile every Python fenced code block in docs and report syntax errors. + """Validate every fenced code block (Python, YAML, JSON, TOML) in docs and report syntax errors. Args: repo_root: Path to the repository root. diff --git a/src/zenzic/models/config.py b/src/zenzic/models/config.py index ffdcb2e..47cd4a6 100644 --- a/src/zenzic/models/config.py +++ b/src/zenzic/models/config.py @@ -180,6 +180,23 @@ class ZenzicConfig(BaseModel): "The --fail-under CLI flag overrides this value when explicitly provided." ), ) + strict: bool = Field( + default=False, + description=( + "When True, treat warnings as errors and validate external URLs via network " + "requests. Equivalent to passing --strict on every check all / score / diff " + "invocation. The --strict CLI flag overrides this value for a single run." + ), + ) + exit_zero: bool = Field( + default=False, + description=( + "When True, zenzic check all always exits with code 0 even when issues are " + "found. Issues are still printed and scored. Useful for observation-only " + "pipelines where you want visibility without blocking. " + "The --exit-zero CLI flag overrides this value for a single run." + ), + ) custom_rules: list[CustomRuleConfig] = Field( default=[], description=( diff --git a/tests/test_references.py b/tests/test_references.py index 05454f6..ecba885 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -361,6 +361,50 @@ def test_harvest_case_insensitive_normalisation(self, tmp_path: Path) -> None: assert scanner.ref_map.resolve("myref") == "https://example.com" assert scanner.ref_map.resolve("MYREF") == "https://example.com" + def test_shield_detects_secret_in_unlabelled_fence(self, tmp_path: Path) -> None: + """A credential inside an unlabelled ``` block must be caught by the Shield.""" + aws_key = "AKIA" + "Z" * 16 + content = f"Some prose.\n```\nexport AWS_KEY={aws_key}\n```\n" + md = self._write_md(tmp_path, content) + scanner = ReferenceScanner(md) + events = list(scanner.harvest()) + secret_events = [e for e in events if e[1] == "SECRET"] + assert len(secret_events) == 1 + assert secret_events[0][2].secret_type == "aws-access-key" + + def test_shield_detects_secret_in_bash_fence(self, tmp_path: Path) -> None: + """A credential inside a ```bash block must be caught by the Shield.""" + stripe_key = "sk_live_" + "X" * 24 + content = f"Example:\n```bash\nexport STRIPE_KEY={stripe_key}\n```\n" + md = self._write_md(tmp_path, content) + scanner = ReferenceScanner(md) + events = list(scanner.harvest()) + secret_events = [e for e in events if e[1] == "SECRET"] + assert len(secret_events) == 1 + assert secret_events[0][2].secret_type == "stripe-live-key" + + def test_shield_fenced_secret_does_not_create_ref_definition(self, tmp_path: Path) -> None: + """A credential inside a fence fires the Shield but must NOT be harvested as a ref-def.""" + github_token = "ghp_" + "B" * 36 + content = f"```\n[secret_ref]: https://example.com/{github_token}\n```\n" + md = self._write_md(tmp_path, content) + scanner = ReferenceScanner(md) + events = list(scanner.harvest()) + # Shield must fire + secret_events = [e for e in events if e[1] == "SECRET"] + assert len(secret_events) == 1 + assert secret_events[0][2].secret_type == "github-token" + # The ref-def must NOT be added to the map (it's inside a fence) + assert "secret_ref" not in scanner.ref_map + + def test_shield_clean_code_block_no_findings(self, tmp_path: Path) -> None: + """A code block with no credentials must not produce SECRET events.""" + content = "```bash\nexport DATABASE_URL=postgres://localhost/mydb\n```\n" + md = self._write_md(tmp_path, content) + scanner = ReferenceScanner(md) + events = list(scanner.harvest()) + assert not any(e[1] == "SECRET" for e in events) + # ══════════════════════════════════════════════════════════════════════════════ # ReferenceScanner — cross_check (Pass 2) diff --git a/tests/test_validator.py b/tests/test_validator.py index d0e5159..cae2e14 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -11,9 +11,7 @@ from zenzic.core.validator import ( _MAX_CONCURRENT_REQUESTS, - I18nFallbackConfig, _build_ref_map, - _extract_i18n_fallback_config, anchors_in_file, extract_links, extract_ref_links, @@ -445,99 +443,6 @@ def _mkdocs_i18n_folder(fallback: bool = True) -> str: ) -class TestI18nFallbackConfig: - """Unit tests for _extract_i18n_fallback_config (pure function).""" - - def test_no_plugin_returns_disabled(self) -> None: - assert _extract_i18n_fallback_config({}) == I18nFallbackConfig( - enabled=False, default_locale="", locale_dirs=frozenset() - ) - - def test_suffix_mode_returns_disabled(self) -> None: - cfg = { - "plugins": [ - { - "i18n": { - "docs_structure": "suffix", - "fallback_to_default": True, - "languages": [{"locale": "en", "default": True}], - } - } - ] - } - result = _extract_i18n_fallback_config(cfg) - assert not result.enabled - - def test_folder_mode_fallback_false_returns_disabled(self) -> None: - cfg = { - "plugins": [ - { - "i18n": { - "docs_structure": "folder", - "fallback_to_default": False, - "languages": [{"locale": "en", "default": True}, {"locale": "it"}], - } - } - ] - } - assert not _extract_i18n_fallback_config(cfg).enabled - - def test_folder_mode_fallback_true_returns_enabled(self) -> None: - cfg = { - "plugins": [ - { - "i18n": { - "docs_structure": "folder", - "fallback_to_default": True, - "languages": [ - {"locale": "en", "default": True}, - {"locale": "it"}, - {"locale": "fr"}, - ], - } - } - ] - } - result = _extract_i18n_fallback_config(cfg) - assert result.enabled is True - assert result.default_locale == "en" - assert result.locale_dirs == frozenset({"it", "fr"}) - - def test_fallback_true_no_default_locale_raises(self) -> None: - """fallback_to_default: true with no default: true language must raise ConfigurationError.""" - from zenzic.core.exceptions import ConfigurationError - - cfg = { - "plugins": [ - { - "i18n": { - "docs_structure": "folder", - "fallback_to_default": True, - "languages": [{"locale": "en"}, {"locale": "it"}], - } - } - ] - } - with pytest.raises(ConfigurationError, match="fallback_to_default"): - _extract_i18n_fallback_config(cfg) - - def test_null_safe(self) -> None: - assert not _extract_i18n_fallback_config({"plugins": None}).enabled - assert not _extract_i18n_fallback_config( - { - "plugins": [ - { - "i18n": { - "docs_structure": "folder", - "fallback_to_default": True, - "languages": None, - } - } - ] - } - ).enabled - - class TestI18nFallbackIntegration: """Integration tests: validate_links with i18n fallback semantics.""" @@ -616,6 +521,69 @@ def test_config_error_no_default_locale(self, tmp_path: Path) -> None: with pytest.raises(ConfigurationError): validate_links(tmp_path) + # ── Anchor i18n fallback tests ───────────────────────────────────────────── + + def test_anchor_fallback_suppresses_translated_heading_miss(self, tmp_path: Path) -> None: + """Anchor present in EN file but absent in IT file is suppressed by fallback. + + Scenario: docs/it/guide.md links to architecture.md#quick-start. + The resolver normalises the target to docs/it/architecture.md (exists). + docs/it/architecture.md uses translated headings, so "quick-start" is + absent from its anchor set — AnchorMissing is produced. + Fallback checks docs/architecture.md, finds "quick-start", and suppresses. + """ + repo, docs, docs_it = self._setup(tmp_path) + # EN file has the target anchor; IT file has a translated heading. + (docs / "architecture.md").write_text("# Architecture\n\n## Quick Start\n") + (docs_it / "architecture.md").write_text("# Architettura\n\n## Avvio Rapido\n") + (docs_it / "guide.md").write_text("[qs](architecture.md#quick-start)\n") + assert validate_links(repo) == [] + + def test_anchor_fallback_reports_when_anchor_missing_in_both_locales( + self, tmp_path: Path + ) -> None: + """An anchor absent in BOTH the IT and EN files is always reported. + + Fallback can only suppress the error when the EN file contains the anchor. + If neither locale has it, the link is genuinely broken. + """ + repo, docs, docs_it = self._setup(tmp_path) + (docs / "architecture.md").write_text("# Architecture\n\n## Overview\n") + (docs_it / "architecture.md").write_text("# Architettura\n\n## Panoramica\n") + # "ghost-anchor" exists in neither EN nor IT file. + (docs_it / "guide.md").write_text("[x](architecture.md#ghost-anchor)\n") + errors = validate_links(repo) + assert any("ghost-anchor" in e for e in errors) + + def test_anchor_fallback_disabled_reports_translated_heading_miss(self, tmp_path: Path) -> None: + """When fallback_to_default is false, translated-heading misses are reported.""" + repo, docs, docs_it = self._setup(tmp_path, fallback=False) + (docs / "architecture.md").write_text("# Architecture\n\n## Quick Start\n") + (docs_it / "architecture.md").write_text("# Architettura\n\n## Avvio Rapido\n") + (docs_it / "guide.md").write_text("[qs](architecture.md#quick-start)\n") + errors = validate_links(repo) + assert any("quick-start" in e for e in errors) + + def test_anchor_fallback_not_triggered_for_en_file_anchor_miss(self, tmp_path: Path) -> None: + """A broken anchor in a default-locale (EN) file is always reported. + + Fallback suppression only applies when the source file is inside a + non-default locale directory. A link from docs/guide.md to + docs/page.md#ghost must still be reported even with fallback enabled. + """ + repo, docs, docs_it = self._setup(tmp_path) + (docs / "page.md").write_text("# Page\n\n## Real Heading\n") + (docs / "guide.md").write_text("[x](page.md#ghost-anchor)\n") + errors = validate_links(repo) + assert any("ghost-anchor" in e for e in errors) + + def test_anchor_in_it_file_resolves_directly_without_fallback(self, tmp_path: Path) -> None: + """An anchor that exists in the IT file itself resolves without touching fallback.""" + repo, docs, docs_it = self._setup(tmp_path) + (docs_it / "architecture.md").write_text("# Architettura\n\n## Avvio Rapido\n") + (docs_it / "guide.md").write_text("[ar](architecture.md#avvio-rapido)\n") + assert validate_links(repo) == [] + # ─── S4-4: Reference-style link resolution ─────────────────────────────────── @@ -879,3 +847,80 @@ def test_validate_snippets_generic_exception_reported(tmp_path: Path) -> None: errors = validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) assert len(errors) == 1 assert "ParserError" in errors[0].message + + +# ─── YAML snippet validation ────────────────────────────────────────────────── + + +def test_validate_snippets_yaml_valid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text("```yaml\nkey: value\nlist:\n - a\n - b\n```\n") + assert validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) == [] + + +def test_validate_snippets_yaml_invalid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text("```yaml\nkey: [\nunclosed bracket\n```\n") + errors = validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) + assert len(errors) == 1 + assert "SyntaxError in YAML snippet" in errors[0].message + + +def test_validate_snippets_yml_alias_invalid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text("```yml\n: bad mapping\n```\n") + errors = validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) + assert len(errors) == 1 + assert "SyntaxError in YAML snippet" in errors[0].message + + +# ─── JSON snippet validation ────────────────────────────────────────────────── + + +def test_validate_snippets_json_valid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text('```json\n{"key": "value", "num": 42}\n```\n') + assert validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) == [] + + +def test_validate_snippets_json_invalid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text('```json\n{"key": "value",}\n```\n') + errors = validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) + assert len(errors) == 1 + assert "SyntaxError in JSON snippet" in errors[0].message + + +def test_validate_snippets_json_line_number(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + # fence opens at line 3 (two preceding lines), error is on line 2 of snippet + (docs / "page.md").write_text("# Page\n\n```json\n{\n bad\n}\n```\n") + errors = validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) + assert len(errors) == 1 + # fence_line=3, json error lineno=2 → reported line 5 + assert errors[0].line_no == 5 + + +# ─── TOML snippet validation ────────────────────────────────────────────────── + + +def test_validate_snippets_toml_valid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text('```toml\ntitle = "Zenzic"\nversion = "0.4.0"\n```\n') + assert validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) == [] + + +def test_validate_snippets_toml_invalid(tmp_path: Path) -> None: + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text("```toml\ntitle = Zenzic # missing quotes\n```\n") + errors = validate_snippets(tmp_path, ZenzicConfig(snippet_min_lines=1)) + assert len(errors) == 1 + assert "SyntaxError in TOML snippet" in errors[0].message diff --git a/uv.lock b/uv.lock index c4428f9..a37b55f 100644 --- a/uv.lock +++ b/uv.lock @@ -2035,7 +2035,7 @@ wheels = [ [[package]] name = "zenzic" -version = "0.4.0rc2" +version = "0.4.0rc3" source = { editable = "." } dependencies = [ { name = "httpx" }, @@ -2045,6 +2045,14 @@ dependencies = [ { name = "typer" }, ] +[package.optional-dependencies] +docs = [ + { name = "mkdocs-material", extra = ["imaging"] }, + { name = "mkdocs-minify-plugin" }, + { name = "mkdocs-static-i18n" }, + { name = "mkdocstrings", extra = ["python"] }, +] + [package.dev-dependencies] dev = [ { name = "bump-my-version" }, @@ -2063,15 +2071,43 @@ dev = [ { name = "ruff" }, { name = "types-pyyaml" }, ] +docs = [ + { name = "mkdocs-material", extra = ["imaging"] }, + { name = "mkdocs-minify-plugin" }, + { name = "mkdocs-static-i18n" }, + { name = "mkdocstrings", extra = ["python"] }, + { name = "requests" }, +] +lint = [ + { name = "mypy" }, + { name = "pre-commit" }, + { name = "reuse" }, + { name = "ruff" }, + { name = "types-pyyaml" }, +] +release = [ + { name = "bump-my-version" }, + { name = "nox" }, + { name = "pip-audit" }, +] +test = [ + { name = "pytest" }, + { name = "pytest-cov" }, +] [package.metadata] requires-dist = [ { name = "httpx", specifier = ">=0.27" }, + { name = "mkdocs-material", extras = ["imaging"], marker = "extra == 'docs'", specifier = ">=9.0.0" }, + { name = "mkdocs-minify-plugin", marker = "extra == 'docs'", specifier = ">=0.7.0" }, + { name = "mkdocs-static-i18n", marker = "extra == 'docs'", specifier = ">=1.3.1" }, + { name = "mkdocstrings", extras = ["python"], marker = "extra == 'docs'", specifier = ">=0.24.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pyyaml", specifier = ">=6.0.0" }, { name = "rich", specifier = ">=13.0.0" }, { name = "typer", specifier = ">=0.9.0" }, ] +provides-extras = ["docs"] [package.metadata.requires-dev] dev = [ @@ -2091,3 +2127,26 @@ dev = [ { name = "ruff", specifier = ">=0.3.0" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, ] +docs = [ + { name = "mkdocs-material", extras = ["imaging"], specifier = ">=9.0.0" }, + { name = "mkdocs-minify-plugin", specifier = ">=0.7.0" }, + { name = "mkdocs-static-i18n", specifier = ">=1.3.1" }, + { name = "mkdocstrings", extras = ["python"], specifier = ">=0.24.0" }, + { name = "requests", specifier = ">=2.33.0" }, +] +lint = [ + { name = "mypy", specifier = ">=1.5.0" }, + { name = "pre-commit", specifier = ">=3.0.0" }, + { name = "reuse", specifier = ">=5.0.2" }, + { name = "ruff", specifier = ">=0.3.0" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, +] +release = [ + { name = "bump-my-version", specifier = ">=1.2.6" }, + { name = "nox", specifier = ">=2024.4.15" }, + { name = "pip-audit", specifier = ">=2.7.0" }, +] +test = [ + { name = "pytest", specifier = ">=8.0.0" }, + { name = "pytest-cov", specifier = ">=4.1.0" }, +]