diff --git a/CHANGELOG.md b/CHANGELOG.md index 0301583..b10a658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,11 @@ Versions follow [Semantic Versioning](https://semver.org/). (Priority 2) → built-in defaults (Priority 3). If both files exist, `zenzic.toml` wins unconditionally. +- **`plugins` config key** (`zenzic.toml` / `[tool.zenzic]`) — + `ZenzicConfig.plugins` now exposes an explicit allow-list of external + rule plugin entry-point names to activate during scanning. Core rules + remain always enabled. + - **`scan_docs_references` `verbose` flag** — new keyword-only parameter `verbose: bool = False`. When `True`, prints a one-line performance telemetry summary to stderr after the scan: engine mode (Sequential or @@ -143,6 +148,11 @@ Versions follow [Semantic Versioning](https://semver.org/). --- +## 0.4.x (abandoned) + +This release cycle was exploratory and included multiple breaking changes. +It has been superseded by the 0.5.x stabilization cycle. + ## [0.4.0-rc4] — 2026-04-01 — Ghost Route Support, VSM Rule Engine & Content-Addressable Cache ## [0.4.0-rc5] — 2026-04-01 — The Sync Sprint: Zensical v0.0.31+ & Parallel API diff --git a/README.it.md b/README.it.md index c14848e..15effd1 100644 --- a/README.it.md +++ b/README.it.md @@ -262,6 +262,10 @@ non segnalare mai i file tradotti come orfani. > Il changelog è ora mantenuto in un unico file inglese (`CHANGELOG.md`). > Questa scelta segue gli standard dell'ecosistema Python open source: > la cronologia delle versioni è documentazione tecnica, non interfaccia utente. +> +> Nota sul ciclo release: la linea `0.4.x` è stata abbandonata (fase +> esplorativa con breaking changes multipli); la linea attiva di +> stabilizzazione è `0.5.x`. --- diff --git a/README.md b/README.md index 480aeab..2a72594 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ absolute links are a hard error, and if you declare `engine = "zensical"` you mu - **`PluginContractError`**: new exception for rule contract violations. - **Plugin documentation**: `docs/developers/plugins.md` (EN + IT) — full contract, packaging instructions, and `pyproject.toml` registration examples. +- **Release-track clarification**: the 0.4.x cycle is considered abandoned + (exploratory with repeated breaking changes); 0.5.x is the active + stabilization line. --- diff --git a/docs/about/index.md b/docs/about/index.md index bf6aa71..87cf834 100644 --- a/docs/about/index.md +++ b/docs/about/index.md @@ -47,4 +47,13 @@ Built by [PythonWoods](https://github.com/PythonWoods), it is designed to run in [:lucide-arrow-right: Open](https://github.com/PythonWoods/zenzic) +- :lucide-history:   __Changelog__ + + --- + + Full release history and current release track policy. + The 0.4.x line is abandoned; 0.5.x is the active stabilization cycle. + + [:lucide-arrow-right: Read](https://github.com/PythonWoods/zenzic/blob/main/CHANGELOG.md) + diff --git a/docs/it/about/index.md b/docs/it/about/index.md index 7047639..7588564 100644 --- a/docs/it/about/index.md +++ b/docs/it/about/index.md @@ -47,4 +47,13 @@ Sviluppato da [PythonWoods](https://github.com/PythonWoods), è progettato per e [:lucide-arrow-right: Apri](https://github.com/PythonWoods/zenzic) +- :lucide-history:   __Changelog__ + + --- + + Storico completo delle release e policy della linea attiva. + La linea 0.4.x e stata abbandonata; la stabilizzazione attiva e 0.5.x. + + [:lucide-arrow-right: Leggi](https://github.com/PythonWoods/zenzic/blob/main/CHANGELOG.md) + diff --git a/src/zenzic/core/rules.py b/src/zenzic/core/rules.py index f28cb37..6307379 100644 --- a/src/zenzic/core/rules.py +++ b/src/zenzic/core/rules.py @@ -71,6 +71,8 @@ if TYPE_CHECKING: + from importlib.metadata import EntryPoint + from zenzic.models.vsm import VSM, Route @@ -684,10 +686,7 @@ def _to_canonical_url(href: str) -> str | None: # ─── Plugin discovery ───────────────────────────────────────────────────────── -from dataclasses import dataclass as _dc # noqa: E402 — module-level, after all classes - - -@_dc +@dataclass(slots=True) class PluginRuleInfo: """Metadata about a discovered plugin rule. @@ -705,6 +704,118 @@ class PluginRuleInfo: origin: str +class PluginRegistry: + """Registry wrapper around ``importlib.metadata`` entry-points. + + Provides read-only discovery for the CLI and explicit rule loading for the + scanner. Discovery is best-effort; loading configured plugins is strict. + """ + + def __init__(self, group: str = "zenzic.rules") -> None: + self._group = group + + def _entry_points(self) -> list[EntryPoint]: + """Return sorted entry-points for the configured group.""" + from importlib.metadata import entry_points + + return sorted(entry_points(group=self._group), key=lambda ep: ep.name) + + def list_rules(self) -> list[PluginRuleInfo]: + """Discover all plugin rules as metadata for CLI inspection.""" + results: list[PluginRuleInfo] = [] + for ep in self._entry_points(): + try: + cls = ep.load() + instance = cls() + if not isinstance(instance, BaseRule): + continue + except Exception: # noqa: BLE001 + continue + dist_name = ep.dist.name if ep.dist is not None else "zenzic" + results.append( + PluginRuleInfo( + rule_id=instance.rule_id, + class_name=f"{cls.__module__}.{cls.__qualname__}", + source=ep.name, + origin=dist_name, + ) + ) + if not any(r.source == "broken-links" for r in results): + results.append( + PluginRuleInfo( + rule_id=VSMBrokenLinkRule().rule_id, + class_name=f"{VSMBrokenLinkRule.__module__}.{VSMBrokenLinkRule.__qualname__}", + source="broken-links", + origin="zenzic", + ) + ) + # Keep ordering deterministic regardless of fallback insertion order. + results.sort(key=lambda r: r.source) + return results + + def load_core_rules(self) -> list[BaseRule]: + """Load core rules registered by the ``zenzic`` distribution.""" + core_eps = [ + ep for ep in self._entry_points() if ep.dist is not None and ep.dist.name == "zenzic" + ] + loaded = [self._load_entry_point(ep) for ep in core_eps] + if not any(rule.rule_id == "Z001" for rule in loaded): + loaded.append(VSMBrokenLinkRule()) + return loaded + + def load_selected_rules(self, plugin_ids: Sequence[str]) -> list[BaseRule]: + """Load only the configured plugin IDs from the entry-point group. + + Args: + plugin_ids: Entry-point names declared in ``config.plugins``. + + Raises: + PluginContractError: If a configured plugin is missing or invalid. + """ + from zenzic.core.exceptions import PluginContractError # deferred: avoid circular import + + requested = [pid.strip() for pid in plugin_ids if pid.strip()] + if not requested: + return [] + + eps_by_name = {ep.name: ep for ep in self._entry_points()} + if "broken-links" in requested and "broken-links" not in eps_by_name: + requested = [pid for pid in requested if pid != "broken-links"] + return [VSMBrokenLinkRule(), *self.load_selected_rules(requested)] + + missing = sorted(set(requested) - set(eps_by_name)) + if missing: + raise PluginContractError( + "Configured plugin rule IDs were not found in the 'zenzic.rules' " + f"entry-point group: {', '.join(missing)}" + ) + + loaded: list[BaseRule] = [] + for pid in requested: + loaded.append(self._load_entry_point(eps_by_name[pid])) + return loaded + + @staticmethod + def _load_entry_point(ep: EntryPoint) -> BaseRule: + """Load and instantiate one entry-point as a :class:`BaseRule`.""" + from zenzic.core.exceptions import PluginContractError # deferred: avoid circular import + + try: + cls = ep.load() + instance = cls() + except Exception as exc: # noqa: BLE001 + raise PluginContractError( + f"Failed to load plugin rule '{ep.name}': {type(exc).__name__}: {exc}" + ) from exc + + if not isinstance(instance, BaseRule): + raise PluginContractError( + f"Plugin rule '{ep.name}' must instantiate a BaseRule, got " + f"{type(instance).__qualname__}." + ) + return instance + + def list_plugin_rules() -> list[PluginRuleInfo]: """Return metadata for every rule registered in the ``zenzic.rules`` group. @@ -717,24 +828,4 @@ def list_plugin_rules() -> list[PluginRuleInfo]: Returns: Sorted list of :class:`PluginRuleInfo`, ordered by ``source`` name. """ - from importlib.metadata import entry_points - - results: list[PluginRuleInfo] = [] - eps = entry_points(group="zenzic.rules") - for ep in eps: - try: - cls = ep.load() - instance: BaseRule = cls() - rid = instance.rule_id - except Exception: # noqa: BLE001 - continue - dist_name = ep.dist.name if ep.dist is not None else "zenzic" - results.append( - PluginRuleInfo( - rule_id=rid, - class_name=f"{cls.__module__}.{cls.__qualname__}", - source=ep.name, - origin=dist_name, - ) - ) - return sorted(results, key=lambda r: r.source) + return PluginRegistry().list_rules() diff --git a/src/zenzic/core/scanner.py b/src/zenzic/core/scanner.py index 7f60891..da4e8c8 100644 --- a/src/zenzic/core/scanner.py +++ b/src/zenzic/core/scanner.py @@ -24,7 +24,7 @@ from urllib.parse import unquote from zenzic.core.adapter import get_adapter -from zenzic.core.rules import AdaptiveRuleEngine +from zenzic.core.rules import AdaptiveRuleEngine, BaseRule from zenzic.core.shield import SecurityFinding, scan_line_for_secrets, scan_url_for_secrets from zenzic.core.validator import LinkValidator from zenzic.models.config import ZenzicConfig @@ -743,14 +743,24 @@ def _iter_md_files( def _build_rule_engine(config: ZenzicConfig) -> AdaptiveRuleEngine | None: """Construct a :class:`~zenzic.core.rules.AdaptiveRuleEngine` from the config. - Returns ``None`` when no custom rules are configured, avoiding the - overhead of engine construction on projects that do not use the feature. + Load order is deterministic: + + 1. Core rules registered by Zenzic itself (always enabled). + 2. Regex rules from ``[[custom_rules]]``. + 3. External plugin rules explicitly listed in ``plugins = [...]``. + + Returns ``None`` when no rules are available. """ - from zenzic.core.rules import CustomRule # deferred to keep import graph clean + from zenzic.core.rules import CustomRule, PluginRegistry # deferred to keep import graph clean - if not config.custom_rules: + # In this per-file pipeline, core VSM-only rules are no-op. Avoid building + # an engine (and avoid extra read_text calls) when no effective rules exist. + if not config.custom_rules and not config.plugins: return None - rules = [ + + registry = PluginRegistry() + rules = registry.load_core_rules() + rules.extend( CustomRule( id=cr.id, pattern=cr.pattern, @@ -758,8 +768,22 @@ def _build_rule_engine(config: ZenzicConfig) -> AdaptiveRuleEngine | None: severity=cr.severity, ) for cr in config.custom_rules - ] - return AdaptiveRuleEngine(rules) + ) + rules.extend(registry.load_selected_rules(config.plugins)) + + # Deduplicate by rule_id while preserving declaration priority. + deduped: list[BaseRule] = [] + seen: set[str] = set() + for rule in rules: + rid = rule.rule_id + if rid in seen: + continue + seen.add(rid) + deduped.append(rule) + + if not deduped: + return None + return AdaptiveRuleEngine(deduped) def _emit_telemetry(*, mode: str, workers: int, n_files: int, elapsed: float) -> None: @@ -826,8 +850,8 @@ def scan_docs_references( The threshold default (50 files) is a conservative heuristic: below it, ``ProcessPoolExecutor`` spawn overhead (~200–400 ms on a cold interpreter) - exceeds the parallelism benefit. Override with ``workers=N`` to force a - specific pool size regardless of file count. + exceeds the parallelism benefit. Override with ``workers=N`` to select a + specific pool size when parallel mode is active. **Determinism guarantee:** results are always sorted by ``file_path`` regardless of execution mode. @@ -836,9 +860,13 @@ def scan_docs_references( sequential mode. Files with security findings are excluded from link validation in both modes. - **O(N) reads:** each file is read exactly once in sequential mode. In - parallel mode external URL registration runs a lightweight sequential pass - in the main process after workers complete (workers discard scanners). + **Read behaviour:** total I/O remains :math:`O(N)` in the number of files, + but individual files may be read multiple times. In sequential mode the + scanner typically performs separate Shield and content passes, and some + rules may trigger an additional ``read_text()`` call. In parallel mode the + same per-worker behaviour applies; when ``validate_links=True`` an extra + lightweight sequential pass in the main process registers external URLs + after workers complete (workers discard scanners). Args: repo_root: Repository root (must contain ``docs/``). @@ -849,9 +877,8 @@ def scan_docs_references( workers: Number of worker processes for parallel mode. ``1`` (default) always uses sequential execution. ``None`` lets ``ProcessPoolExecutor`` pick based on - ``os.cpu_count()``. Any value other than ``1`` - activates parallel mode when the file count is at or - above :data:`ADAPTIVE_PARALLEL_THRESHOLD`. + ``os.cpu_count()``. Values must be ``None`` or + greater than or equal to ``1``. verbose: When ``True``, print a single telemetry line to stderr after the scan completes. Shows the engine mode, worker count, elapsed time, and estimated speedup (parallel @@ -867,6 +894,9 @@ def scan_docs_references( """ import time + if workers is not None and workers < 1: + raise ValueError("workers must be None or an integer >= 1") + if config is None: config, _ = ZenzicConfig.load(repo_root) @@ -912,7 +942,7 @@ def scan_docs_references( # Shield-as-firewall guarantee (no URLs from compromised files). secure_scanners_b: list[ReferenceScanner] = [] for md_file in md_files: - _report_b, secure_scanner_b = _scan_single_file(md_file, config, rule_engine) + _report_b, secure_scanner_b = _scan_single_file(md_file, config, None) if secure_scanner_b is not None: secure_scanners_b.append(secure_scanner_b) validator_b = LinkValidator() diff --git a/src/zenzic/models/config.py b/src/zenzic/models/config.py index d5b3796..27e46b0 100644 --- a/src/zenzic/models/config.py +++ b/src/zenzic/models/config.py @@ -206,6 +206,14 @@ class ZenzicConfig(BaseModel): "message='Remove before publish.' severity='warning'" ), ) + plugins: list[str] = Field( + default_factory=list, + description=( + "Explicit allow-list of external rule plugins to activate from the " + "'zenzic.rules' entry-point group. Core rules shipped by Zenzic are " + "always enabled." + ), + ) # Pre-compiled regex patterns for placeholder detection. # Populated automatically from placeholder_patterns in model_post_init. # Excluded from serialisation — never written to or read from TOML. diff --git a/tests/test_config.py b/tests/test_config.py index ace9559..fa34c90 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -76,6 +76,14 @@ def test_load_config_custom_rules(tmp_path: Path) -> None: assert loaded is True +def test_load_config_plugins_list(tmp_path: Path) -> None: + """plugins = [...] is parsed from zenzic.toml.""" + (tmp_path / "zenzic.toml").write_text("plugins = ['no-internal-hostname', 'acme-style']\n") + config, loaded = ZenzicConfig.load(tmp_path) + assert config.plugins == ["no-internal-hostname", "acme-style"] + assert loaded is True + + def test_placeholder_patterns_compiled_on_init(tmp_path: Path) -> None: """placeholder_patterns_compiled is populated automatically from placeholder_patterns.""" config = ZenzicConfig(placeholder_patterns=["todo", "wip"]) @@ -130,6 +138,16 @@ def test_load_config_pyproject_custom_rules(tmp_path: Path) -> None: assert loaded is True +def test_load_config_pyproject_plugins_list(tmp_path: Path) -> None: + """plugins list is parsed from [tool.zenzic] in pyproject.toml.""" + (tmp_path / "pyproject.toml").write_text( + "[tool.zenzic]\nplugins = ['no-internal-hostname', 'acme-style']\n" + ) + config, loaded = ZenzicConfig.load(tmp_path) + assert config.plugins == ["no-internal-hostname", "acme-style"] + assert loaded is True + + def test_load_config_zenzic_toml_wins_over_pyproject(tmp_path: Path) -> None: """zenzic.toml takes priority over [tool.zenzic] in pyproject.toml.""" (tmp_path / "zenzic.toml").write_text("fail_under = 90\n") diff --git a/tests/test_integration_finale.py b/tests/test_integration_finale.py index 3d036a4..e07edd3 100644 --- a/tests/test_integration_finale.py +++ b/tests/test_integration_finale.py @@ -15,7 +15,7 @@ import pytest -from zenzic.core.rules import PluginRuleInfo, list_plugin_rules +from zenzic.core.rules import BaseRule, PluginRuleInfo, RuleFinding, list_plugin_rules from zenzic.core.scanner import ADAPTIVE_PARALLEL_THRESHOLD, scan_docs_references from zenzic.models.config import ZenzicConfig @@ -31,6 +31,15 @@ def _make_docs(tmp_path: Path, n_files: int = 3) -> Path: return tmp_path +class _DummyRule(BaseRule): + @property + def rule_id(self) -> str: + return "DUMMY" + + def check(self, file_path: Path, text: str) -> list[RuleFinding]: + return [] + + # ─── list_plugin_rules ──────────────────────────────────────────────────────── @@ -94,8 +103,26 @@ def test_list_plugin_rules_skips_unloadable_entry_point() -> None: with patch("importlib.metadata.entry_points", return_value=[bad_ep]): result = list_plugin_rules() - # Should return empty list without raising - assert result == [] + # Bad plugin is skipped; built-in core fallback is still present. + assert all(info.source != "bad" for info in result) + assert any(info.source == "broken-links" for info in result) + + +def test_list_plugin_rules_fallback_keeps_sorted_order() -> None: + """Core fallback insertion preserves sorted-by-source ordering.""" + from importlib.metadata import EntryPoint + + zzz_ep = EntryPoint( + name="zzz-rule", value="tests.test_integration_finale:_DummyRule", group="zenzic.rules" + ) + + with patch("importlib.metadata.entry_points", return_value=[zzz_ep]): + result = list_plugin_rules() + + sources = [r.source for r in result] + assert "broken-links" in sources + assert "zzz-rule" in sources + assert sources == sorted(sources) # ─── CLI: zenzic plugins list ──────────────────────────────────────────────── diff --git a/tests/test_parallel.py b/tests/test_parallel.py index a4edc87..3c9275f 100644 --- a/tests/test_parallel.py +++ b/tests/test_parallel.py @@ -89,6 +89,16 @@ def test_parallel_single_worker_is_sequential(tmp_path: Path) -> None: assert report.score == 100.0 +@pytest.mark.parametrize("workers", [0, -1, -8]) +def test_parallel_invalid_workers_raise_clear_error(tmp_path: Path, workers: int) -> None: + """workers must be None or >= 1 to avoid opaque executor errors.""" + repo = _make_docs(tmp_path, n_files=2) + config = ZenzicConfig() + + with pytest.raises(ValueError, match="workers must be None or an integer >= 1"): + scan_docs_references(repo, config, workers=workers) + + def test_parallel_sorted_output(tmp_path: Path) -> None: """Output is sorted by file_path regardless of worker scheduling order.""" repo = _make_docs(tmp_path, n_files=8) diff --git a/tests/test_rules.py b/tests/test_rules.py index 566d5c9..3f8263a 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -6,6 +6,7 @@ import time from pathlib import Path +from unittest.mock import patch import pytest @@ -36,6 +37,28 @@ def check(self, file_path: Path, text: str) -> list[RuleFinding]: raise RuntimeError("rule internal error") +class _PluginTodoRule(BaseRule): + @property + def rule_id(self) -> str: + return "PLUG-TODO" + + def check(self, file_path: Path, text: str) -> list[RuleFinding]: + findings: list[RuleFinding] = [] + for line_no, line in enumerate(text.splitlines(), start=1): + if "PLUGIN_TODO" in line: + findings.append( + RuleFinding( + file_path=file_path, + line_no=line_no, + rule_id=self.rule_id, + message="Plugin TODO marker found.", + severity="error", + matched_line=line, + ) + ) + return findings + + # ─── CustomRule ─────────────────────────────────────────────────────────────── @@ -196,6 +219,51 @@ def test_scan_docs_with_custom_rules_from_config(tmp_path: Path) -> None: assert reports[0].rule_findings[0].rule_id == "ZZ-DRAFT" +def test_build_rule_engine_none_without_custom_or_plugins() -> None: + """Without custom_rules/plugins, scanner avoids building a no-op engine.""" + from zenzic.core.scanner import _build_rule_engine + + config = ZenzicConfig() + assert _build_rule_engine(config) is None + + +def test_scan_docs_with_enabled_plugins_from_config(tmp_path: Path) -> None: + """plugins=[...] activates external plugin rules during scanning.""" + from zenzic.core.scanner import scan_docs_references + + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text("# Page\n\nPLUGIN_TODO marker.\n") + + config = ZenzicConfig(plugins=["acme-todo"]) + + with ( + patch("zenzic.core.rules.PluginRegistry.load_core_rules", return_value=[]), + patch( + "zenzic.core.rules.PluginRegistry.load_selected_rules", + return_value=[_PluginTodoRule()], + ), + ): + reports, _ = scan_docs_references(tmp_path, config) + + assert len(reports) == 1 + assert len(reports[0].rule_findings) == 1 + assert reports[0].rule_findings[0].rule_id == "PLUG-TODO" + + +def test_scan_docs_with_unknown_plugin_raises_contract_error(tmp_path: Path) -> None: + """Unknown plugin IDs in config.plugins fail fast with a clear error.""" + from zenzic.core.scanner import scan_docs_references + + docs = tmp_path / "docs" + docs.mkdir() + (docs / "page.md").write_text("# Page\n") + + config = ZenzicConfig(plugins=["does-not-exist"]) + with pytest.raises(PluginContractError, match="Configured plugin rule IDs were not found"): + scan_docs_references(tmp_path, config) + + # ─── Cross-adapter custom rules (Dev 4 mandate) ───────────────────────────────