diff --git a/.github/workflows/test-parallel.yml b/.github/workflows/test-parallel.yml
index 72825e7..6c75bf1 100644
--- a/.github/workflows/test-parallel.yml
+++ b/.github/workflows/test-parallel.yml
@@ -40,7 +40,7 @@ jobs:
 
             plugin=$(echo $test_file | sed 's|abx_plugins/plugins/\([^/]*\)/.*|\1|')
             test_name=$(basename $test_file .py | sed 's/^test_//')
-            name="plugin/$plugin/$test_name"
+            name="$test_name"
 
             json_array+="{\"path\":\"$test_file\",\"name\":\"$name\"}"
           done
@@ -93,13 +93,20 @@ jobs:
 
       - uses: awalsh128/cache-apt-pkgs-action@latest
         with:
-          packages: git ripgrep build-essential python3-dev python3-setuptools libssl-dev libldap2-dev libsasl2-dev zlib1g-dev libatomic1 python3-minimal gnupg2 curl wget python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps
+          packages: git wget ripgrep build-essential python3-dev python3-setuptools libssl-dev libldap2-dev libsasl2-dev zlib1g-dev libatomic1 python3-minimal gnupg2 curl wget python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps
           version: 1.1
 
       - name: Install dependencies with uv
         run: |
+          uv venv
+          uv sync --dev --all-extras
           uv pip install -e ".[dev]"
 
       - name: Run test - ${{ matrix.test.name }}
         run: |
-          uv run pytest -xvs "${{ matrix.test.path }}" --basetemp=tests/out
+          uv run pytest -xvs "${{ matrix.test.path }}" --basetemp="$RUNNER_TEMP/pytest-out"
+        env:
+          TWOCAPTCHA_API_KEY: ${{ secrets.TWOCAPTCHA_API_KEY }}
+          CHROME_ARGS_EXTRA: '["--no-sandbox"]'
+          CHROME_HEADLESS: "True"
+          CHROME_BINARY: "/usr/bin/chromium"
diff --git a/README.md b/README.md
index 8e82c3a..5c0d94c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # abx-plugins
 
-ArchiveBox-compatible plugin suite (hooks, config schemas, binaries manifests).
+ArchiveBox-compatible plugin suite (hooks and config schemas).
 
 This package contains only plugin assets and a tiny helper to locate them.
 It does **not** depend on Django or ArchiveBox.
@@ -11,7 +11,7 @@ It does **not** depend on Django or ArchiveBox.
 from abx_plugins import get_plugins_dir
 
 plugins_dir = get_plugins_dir()
-# scan plugins_dir for plugins/*/config.json, binaries.jsonl, on_* hooks
+# scan plugins_dir for plugins/*/config.json and on_* hooks
 ```
 
 Tools like `abx-dl` and ArchiveBox can discover plugins from this package
@@ -23,8 +23,9 @@ without symlinks or environment-variable tricks.
 
 Each plugin lives under `plugins/<name>/` and may include:
 
-- `config.json` (optional) - config schema
-- `on_*` hook scripts (required to do work)
+- `config.json` config schema
+- `on_Crawl__...` per-crawl hook scripts (optional) - install dependencies / set up shared resources
+- `on_Snapshot__...` per-snapshot hooks - for each URL: do xyz...
 
 Hooks run with:
 
@@ -42,6 +43,78 @@ Hooks run with:
 - `PERSONAS_DIR` - persona profiles root (default: `~/.config/abx/personas`)
 - `ACTIVE_PERSONA` - persona name (default: `Default`)
 
+### Install hook contract (concise)
+
+Lifecycle:
+
+1. `on_Crawl__*install*` declares crawl dependencies.
+2. `on_Binary__*install*` resolves/installs one binary with one provider.
+
+`on_Crawl` output (dependency declaration):
+
+```json
+{"type":"Binary","name":"yt-dlp","binproviders":"pip,brew,apt,env","overrides":{"pip":{"packages":["yt-dlp[default]"]}},"machine_id":"<optional>"}
+```
+
+`on_Binary` input/output:
+
+- CLI input should accept `--binary-id`, `--machine-id`, `--name` (plus optional provider args).
+- Output should emit installed facts like:
+
+```json
+{"type":"Binary","name":"yt-dlp","abspath":"/abs/path","version":"2025.01.01","sha256":"<optional>","binprovider":"pip","machine_id":"<recommended>","binary_id":"<recommended>"}
+```
+
+Optional machine patch record:
+
+```json
+{"type":"Machine","config":{"PATH":"...","NODE_MODULES_DIR":"...","CHROME_BINARY":"..."}}
+```
+
+Semantics:
+
+- `stdout`: JSONL records only
+- `stderr`: human logs/debug
+- exit `0`: success or intentional skip
+- exit non-zero: hard failure
+
+State/OS:
+
+- working dir: `CRAWL_DIR/<plugin>/`
+- durable install root: `LIB_DIR` (e.g. npm prefix, pip venv, puppeteer cache)
+- providers: `apt` (Debian/Ubuntu), `brew` (macOS/Linux), many hooks currently assume POSIX paths
+
+### Snapshot hook contract (concise)
+
+Lifecycle:
+
+- runs once per snapshot, typically after crawl setup
+- common Chrome flow: crawl browser/session -> `chrome_tab` -> `chrome_navigate` -> downstream extractors
+
+State:
+
+- output cwd is usually `SNAP_DIR/<plugin>/`
+- hooks may read sibling outputs via `../<plugin>/...`
+
+Output records:
+
+- terminal record is usually:
+
+```json
+{"type":"ArchiveResult","status":"succeeded|skipped|failed","output_str":"path-or-message"}
+```
+
+- discovery hooks may also emit `Snapshot` and `Tag` records before `ArchiveResult`
+- search indexing hooks are a known exception and may use exit code + stderr without `ArchiveResult`
+
+Semantics:
+
+- `stdout`: JSONL records
+- `stderr`: diagnostics/logging
+- exit `0`: succeeded or skipped
+- exit non-zero: failed
+- current nuance: some skip/transient paths emit no JSONL and rely only on exit code
+
 ### Event JSONL interface (bbus-style, no dependency)
 
 Hooks emit JSONL events to stdout. They do **not** need to import `bbus`.
diff --git a/abx_plugins/__init__.py b/abx_plugins/__init__.py
index 6619567..2a69c75 100644
--- a/abx_plugins/__init__.py
+++ b/abx_plugins/__init__.py
@@ -3,12 +3,11 @@
 from __future__ import annotations
 
 from pathlib import Path
-from importlib import resources
 
 
 def get_plugins_dir() -> Path:
     """Return the filesystem path to the bundled plugins directory."""
-    return Path(resources.files(__name__) / "plugins")
+    return Path(__file__).resolve().parent / "plugins"
 
 
 __all__ = ["get_plugins_dir"]
diff --git a/abx_plugins/plugins/accessibility/on_Snapshot__39_accessibility.js b/abx_plugins/plugins/accessibility/on_Snapshot__39_accessibility.js
index f879283..14c60f4 100755
--- a/abx_plugins/plugins/accessibility/on_Snapshot__39_accessibility.js
+++ b/abx_plugins/plugins/accessibility/on_Snapshot__39_accessibility.js
@@ -20,6 +20,14 @@ const path = require('path');
 // Add NODE_MODULES_DIR to module resolution paths if set
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
+const {
+    getEnvBool,
+    getEnvInt,
+    parseArgs,
+    readCdpUrl,
+    connectToPage,
+    waitForPageLoaded,
+} = require('../chrome/chrome_utils.js');
 
 // Extractor metadata
 const PLUGIN_NAME = 'accessibility';
@@ -32,100 +40,27 @@ if (!fs.existsSync(OUTPUT_DIR)) {
 process.chdir(OUTPUT_DIR);
 const OUTPUT_FILE = 'accessibility.json';
 const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Get environment variable with default
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-// Get CDP URL from chrome plugin
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-function assertChromeSession() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const pidFile = path.join(CHROME_SESSION_DIR, 'chrome.pid');
-    if (!fs.existsSync(cdpFile) || !fs.existsSync(targetIdFile) || !fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    try {
-        const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
-        if (!pid || Number.isNaN(pid)) throw new Error('Invalid pid');
-        process.kill(pid, 0);
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    return cdpUrl;
-}
 
 // Extract accessibility info
-async function extractAccessibility(url) {
+async function extractAccessibility(url, timeoutMs) {
     // Output directory is current directory (hook already runs in output dir)
     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
 
     let browser = null;
 
     try {
-        // Connect to existing Chrome session
-        const cdpUrl = assertChromeSession();
+        if (!readCdpUrl(CHROME_SESSION_DIR)) {
+            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
+        }
 
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
+        const connection = await connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs,
+            puppeteer,
         });
-
-        // Get the page
-        const pages = await browser.pages();
-        const page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            return { success: false, error: 'No page found in Chrome session' };
-        }
+        browser = connection.browser;
+        const page = connection.page;
+        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs * 4, 200);
 
         // Get accessibility snapshot
         const accessibilityTree = await page.accessibility.snapshot({ interestingOnly: true });
@@ -250,14 +185,8 @@ async function main() {
             process.exit(0);
         }
 
-        // Check if Chrome session exists, then wait for page load
-        assertChromeSession();
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
-
-        const result = await extractAccessibility(url);
+        const timeoutMs = getEnvInt('ACCESSIBILITY_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
+        const result = await extractAccessibility(url, timeoutMs);
 
         if (result.success) {
             status = 'succeeded';
diff --git a/abx_plugins/plugins/accessibility/tests/test_accessibility.py b/abx_plugins/plugins/accessibility/tests/test_accessibility.py
index b1a1e24..f03fb32 100644
--- a/abx_plugins/plugins/accessibility/tests/test_accessibility.py
+++ b/abx_plugins/plugins/accessibility/tests/test_accessibility.py
@@ -13,18 +13,19 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
     get_test_env,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_url,
 )
 
 
 def chrome_available() -> bool:
     """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
+    for name in ["chromium", "chromium-browser", "google-chrome", "chrome"]:
         if shutil.which(name):
             return True
     return False
@@ -32,7 +33,7 @@ def chrome_available() -> bool:
 
 # Get the path to the accessibility hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-ACCESSIBILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_accessibility.*')
+ACCESSIBILITY_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_accessibility.*")
 
 
 class TestAccessibilityPlugin:
@@ -40,7 +41,9 @@ class TestAccessibilityPlugin:
 
     def test_accessibility_hook_exists(self):
         """Accessibility hook script should exist."""
-        assert ACCESSIBILITY_HOOK is not None, "Accessibility hook not found in plugin directory"
+        assert ACCESSIBILITY_HOOK is not None, (
+            "Accessibility hook not found in plugin directory"
+        )
         assert ACCESSIBILITY_HOOK.exists(), f"Hook not found: {ACCESSIBILITY_HOOK}"
 
 
@@ -50,7 +53,7 @@ class TestAccessibilityWithChrome:
     def setup_method(self, _method=None):
         """Set up test environment."""
         self.temp_dir = Path(tempfile.mkdtemp())
-        self.snap_dir = self.temp_dir / 'snap'
+        self.snap_dir = self.temp_dir / "snap"
         self.snap_dir.mkdir(parents=True, exist_ok=True)
 
     def teardown_method(self, _method=None):
@@ -60,12 +63,12 @@ def teardown_method(self, _method=None):
     def test_accessibility_extracts_page_outline(self, chrome_test_url):
         """Accessibility hook should extract headings and accessibility tree."""
         test_url = chrome_test_url
-        snapshot_id = 'test-accessibility-snapshot'
+        snapshot_id = "test-accessibility-snapshot"
 
         try:
             with chrome_session(
                 self.temp_dir,
-                crawl_id='test-accessibility-crawl',
+                crawl_id="test-accessibility-crawl",
                 snapshot_id=snapshot_id,
                 test_url=test_url,
                 navigate=True,
@@ -75,16 +78,23 @@ def test_accessibility_extracts_page_outline(self, chrome_test_url):
 
                 # Run accessibility hook with the active Chrome session
                 result = subprocess.run(
-                    ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(ACCESSIBILITY_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(snapshot_chrome_dir),
                     capture_output=True,
                     text=True,
                     timeout=60,
-                    env=env
+                    env=env,
                 )
 
                 # Check for output file
-                accessibility_output = Path(env['SNAP_DIR']) / 'accessibility' / 'accessibility.json'
+                accessibility_output = (
+                    Path(env["SNAP_DIR"]) / "accessibility" / "accessibility.json"
+                )
 
                 accessibility_data = None
 
@@ -98,14 +108,18 @@ def test_accessibility_extracts_page_outline(self, chrome_test_url):
 
                 # Verify hook ran successfully
                 assert result.returncode == 0, f"Hook failed: {result.stderr}"
-                assert 'Traceback' not in result.stderr
+                assert "Traceback" not in result.stderr
 
                 # example.com has headings, so we should get accessibility data
-                assert accessibility_data is not None, "No accessibility data was generated"
+                assert accessibility_data is not None, (
+                    "No accessibility data was generated"
+                )
 
                 # Verify we got page outline data
-                assert 'headings' in accessibility_data, f"Missing headings: {accessibility_data}"
-                assert 'url' in accessibility_data, f"Missing url: {accessibility_data}"
+                assert "headings" in accessibility_data, (
+                    f"Missing headings: {accessibility_data}"
+                )
+                assert "url" in accessibility_data, f"Missing url: {accessibility_data}"
 
         except RuntimeError:
             raise
@@ -113,38 +127,43 @@ def test_accessibility_extracts_page_outline(self, chrome_test_url):
     def test_accessibility_disabled_skips(self, chrome_test_url):
         """Test that ACCESSIBILITY_ENABLED=False skips without error."""
         test_url = chrome_test_url
-        snapshot_id = 'test-disabled'
+        snapshot_id = "test-disabled"
 
-        env = get_test_env() | {'SNAP_DIR': str(self.snap_dir)}
-        env['ACCESSIBILITY_ENABLED'] = 'False'
+        env = get_test_env() | {"SNAP_DIR": str(self.snap_dir)}
+        env["ACCESSIBILITY_ENABLED"] = "False"
 
         result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+            [
+                "node",
+                str(ACCESSIBILITY_HOOK),
+                f"--url={test_url}",
+                f"--snapshot-id={snapshot_id}",
+            ],
             cwd=str(self.temp_dir),
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should exit 0 even when disabled
         assert result.returncode == 0, f"Should succeed when disabled: {result.stderr}"
 
         # Should NOT create output file when disabled
-        accessibility_output = self.snap_dir / 'accessibility' / 'accessibility.json'
+        accessibility_output = self.snap_dir / "accessibility" / "accessibility.json"
         assert not accessibility_output.exists(), "Should not create file when disabled"
 
     def test_accessibility_missing_url_argument(self):
         """Test that missing --url argument causes error."""
-        snapshot_id = 'test-missing-url'
+        snapshot_id = "test-missing-url"
 
         result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--snapshot-id={snapshot_id}'],
+            ["node", str(ACCESSIBILITY_HOOK), f"--snapshot-id={snapshot_id}"],
             cwd=str(self.temp_dir),
             capture_output=True,
             text=True,
             timeout=30,
-            env=get_test_env() | {'SNAP_DIR': str(self.snap_dir)}
+            env=get_test_env() | {"SNAP_DIR": str(self.snap_dir)},
         )
 
         # Should fail with non-zero exit code
@@ -155,12 +174,12 @@ def test_accessibility_missing_snapshot_id_argument(self, chrome_test_url):
         test_url = chrome_test_url
 
         result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}'],
+            ["node", str(ACCESSIBILITY_HOOK), f"--url={test_url}"],
             cwd=str(self.temp_dir),
             capture_output=True,
             text=True,
             timeout=30,
-            env=get_test_env() | {'SNAP_DIR': str(self.snap_dir)}
+            env=get_test_env() | {"SNAP_DIR": str(self.snap_dir)},
         )
 
         # Should fail with non-zero exit code
@@ -169,15 +188,20 @@ def test_accessibility_missing_snapshot_id_argument(self, chrome_test_url):
     def test_accessibility_with_no_chrome_session(self, chrome_test_url):
         """Test that hook fails gracefully when no Chrome session exists."""
         test_url = chrome_test_url
-        snapshot_id = 'test-no-chrome'
+        snapshot_id = "test-no-chrome"
 
         result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+            [
+                "node",
+                str(ACCESSIBILITY_HOOK),
+                f"--url={test_url}",
+                f"--snapshot-id={snapshot_id}",
+            ],
             cwd=str(self.temp_dir),
             capture_output=True,
             text=True,
             timeout=30,
-            env=get_test_env()
+            env=get_test_env(),
         )
 
         # Should fail when no Chrome session
@@ -185,9 +209,9 @@ def test_accessibility_with_no_chrome_session(self, chrome_test_url):
         # Error should mention CDP or Chrome
         err_lower = result.stderr.lower()
         assert any(
-            x in err_lower for x in ['chrome', 'cdp', 'cannot find', 'puppeteer']
+            x in err_lower for x in ["chrome", "cdp", "cannot find", "puppeteer"]
         ), f"Should mention Chrome/CDP in error: {result.stderr}"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/apt/on_Binary__13_apt_install.py b/abx_plugins/plugins/apt/on_Binary__13_apt_install.py
index 03767c5..38392cf 100755
--- a/abx_plugins/plugins/apt/on_Binary__13_apt_install.py
+++ b/abx_plugins/plugins/apt/on_Binary__13_apt_install.py
@@ -16,23 +16,22 @@
 import sys
 
 import rich_click as click
-from abx_pkg import Binary, AptProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-AptProvider.model_rebuild()
+from abx_pkg import AptProvider, Binary, EnvProvider
 
 
 @click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
+@click.option("--binary-id", required=True, help="Binary UUID")
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--name", required=True, help="Binary name to install")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--overrides", default=None, help="JSON-encoded overrides dict")
+def main(
+    binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None
+):
     """Install binary using apt package manager."""
 
     # Check if apt provider is allowed
-    if binproviders != '*' and 'apt' not in binproviders.split(','):
+    if binproviders != "*" and "apt" not in binproviders.split(","):
         click.echo(f"apt provider not allowed for {name}", err=True)
         sys.exit(0)  # Not an error, just skip
 
@@ -42,7 +41,7 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
         click.echo("apt not available on this system", err=True)
         sys.exit(1)
 
-    click.echo(f"Installing {name} via apt...", err=True)
+    click.echo(f"Resolving {name} via apt (load or install)...", err=True)
 
     try:
         # Parse overrides if provided
@@ -51,12 +50,19 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
             try:
                 overrides_dict = json.loads(overrides)
                 # Extract apt-specific overrides
-                overrides_dict = overrides_dict.get('apt', {})
+                overrides_dict = overrides_dict.get("apt", {})
                 click.echo(f"Using apt install overrides: {overrides_dict}", err=True)
             except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides={'apt': overrides_dict} if overrides_dict else {}).install()
+                click.echo(
+                    f"Warning: Failed to parse overrides JSON: {overrides}", err=True
+                )
+
+        # Prefer already-installed binaries found in PATH, then fall back to apt install.
+        binary = Binary(
+            name=name,
+            binproviders=[EnvProvider(), provider],
+            overrides={"apt": overrides_dict} if overrides_dict else {},
+        ).load_or_install()
     except Exception as e:
         click.echo(f"apt install failed: {e}", err=True)
         sys.exit(1)
@@ -65,16 +71,22 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
         click.echo(f"{name} not found after apt install", err=True)
         sys.exit(1)
 
+    resolved_provider = getattr(binary, "binprovider", None)
+    if isinstance(resolved_provider, str):
+        resolved_provider_name = resolved_provider
+    else:
+        resolved_provider_name = getattr(resolved_provider, "name", "") or ""
+
     # Output Binary JSONL record to stdout
     record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'apt',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
+        "type": "Binary",
+        "name": name,
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": resolved_provider_name,
+        "machine_id": machine_id,
+        "binary_id": binary_id,
     }
     print(json.dumps(record))
 
@@ -85,5 +97,5 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/apt/tests/test_apt_provider.py b/abx_plugins/plugins/apt/tests/test_apt_provider.py
index 417a72a..f7d46fe 100644
--- a/abx_plugins/plugins/apt/tests/test_apt_provider.py
+++ b/abx_plugins/plugins/apt/tests/test_apt_provider.py
@@ -8,7 +8,6 @@
 """
 
 import json
-import os
 import shutil
 import subprocess
 import sys
@@ -20,18 +19,19 @@
 
 # Get the path to the apt provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_apt_install.py'), None)
+INSTALL_HOOK = next(PLUGIN_DIR.glob("on_Binary__*_apt_install.py"), None)
 
 
 def apt_available() -> bool:
     """Check if apt is installed."""
-    return shutil.which('apt') is not None or shutil.which('apt-get') is not None
+    return shutil.which("apt") is not None or shutil.which("apt-get") is not None
 
 
 def is_linux() -> bool:
     """Check if running on Linux."""
     import platform
-    return platform.system().lower() == 'linux'
+
+    return platform.system().lower() == "linux"
 
 
 class TestAptProviderHook:
@@ -53,19 +53,20 @@ def test_hook_skips_when_apt_not_allowed(self):
         """Hook should skip when apt not in allowed binproviders."""
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=wget',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,npm',  # apt not allowed
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=wget",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                "--binproviders=pip,npm",  # apt not allowed
             ],
             capture_output=True,
             text=True,
-            timeout=30
+            timeout=30,
         )
 
         # Should exit cleanly (code 0) when apt not allowed
-        assert 'apt provider not allowed' in result.stderr
+        assert "apt provider not allowed" in result.stderr
         assert result.returncode == 0
 
     @pytest.mark.skipif(not is_linux(), reason="apt only available on Linux")
@@ -74,40 +75,40 @@ def test_hook_detects_apt(self):
         assert apt_available(), "apt not installed"
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent-pkg-xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=nonexistent-pkg-xyz123",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
-            timeout=30
+            timeout=30,
         )
 
         # Should not say apt is not available
-        assert 'apt not available' not in result.stderr
+        assert "apt not available" not in result.stderr
 
     def test_hook_handles_overrides(self):
         """Hook should accept overrides JSON."""
-        overrides = json.dumps({
-            'apt': {'packages': ['custom-package-name']}
-        })
+        overrides = json.dumps({"apt": {"packages": ["custom-package-name"]}})
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=test-pkg',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                f'--overrides={overrides}',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=test-pkg",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                f"--overrides={overrides}",
             ],
             capture_output=True,
             text=True,
-            timeout=30
+            timeout=30,
         )
 
         # Should not crash parsing overrides
-        assert 'Traceback' not in result.stderr
+        assert "Traceback" not in result.stderr
 
 
 @pytest.mark.skipif(not is_linux(), reason="apt only available on Linux")
@@ -120,34 +121,35 @@ def test_detect_existing_binary(self):
         # Check for a binary that's almost certainly installed (like 'ls' or 'bash')
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=bash',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=bash",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
-            timeout=60
+            timeout=60,
         )
 
         # Parse JSONL output
-        for line in result.stdout.split('\n'):
+        for line in result.stdout.split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'bash':
+                    if record.get("type") == "Binary" and record.get("name") == "bash":
                         # Found bash
-                        assert record.get('abspath')
-                        assert Path(record['abspath']).exists()
+                        assert record.get("abspath")
+                        assert Path(record["abspath"]).exists()
                         return
                 except json.JSONDecodeError:
                     continue
 
         # apt may not be able to "install" bash (already installed)
         # Just verify no crash
-        assert 'Traceback' not in result.stderr
+        assert "Traceback" not in result.stderr
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py b/abx_plugins/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
index a981e3f..d69ed63 100755
--- a/abx_plugins/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
+++ b/abx_plugins/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
@@ -15,22 +15,24 @@
 import json
 import os
 import sys
+from importlib import import_module
 from pathlib import Path
+from typing import Any
 
 import rich_click as click
 
 
 # Extractor metadata
-PLUGIN_NAME = 'archivedotorg'
+PLUGIN_NAME = "archivedotorg"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-OUTPUT_FILE = 'archive.org.txt'
+OUTPUT_FILE = "archive.org.txt"
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
@@ -47,81 +49,85 @@ def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
 
     Returns: (success, output_path, error_message)
     """
+
     def log(message: str) -> None:
-        print(f'[archivedotorg] {message}', file=sys.stderr)
+        print(f"[archivedotorg] {message}", file=sys.stderr)
 
     try:
-        import requests
-    except ImportError:
-        return False, None, 'requests library not installed'
+        requests: Any = import_module("requests")
+    except ModuleNotFoundError:
+        return False, None, "requests library not installed"
 
-    timeout = get_env_int('ARCHIVEDOTORG_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
+    timeout = get_env_int("ARCHIVEDOTORG_TIMEOUT") or get_env_int("TIMEOUT", 60)
+    user_agent = get_env("USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)")
 
-    submit_url = f'https://web.archive.org/save/{url}'
-    log(f'Submitting to Wayback Machine (timeout={timeout}s)')
-    log(f'GET {submit_url}')
+    submit_url = f"https://web.archive.org/save/{url}"
+    log(f"Submitting to Wayback Machine (timeout={timeout}s)")
+    log(f"GET {submit_url}")
 
     try:
         response = requests.get(
             submit_url,
             timeout=timeout,
-            headers={'User-Agent': user_agent},
+            headers={"User-Agent": user_agent},
             allow_redirects=True,
         )
-        log(f'HTTP {response.status_code} final_url={response.url}')
+        log(f"HTTP {response.status_code} final_url={response.url}")
 
         # Check for successful archive
-        content_location = response.headers.get('Content-Location', '')
-        x_archive_orig_url = response.headers.get('X-Archive-Orig-Url', '')
+        content_location = response.headers.get("Content-Location", "")
+        x_archive_orig_url = response.headers.get("X-Archive-Orig-Url", "")
         if content_location:
-            log(f'Content-Location: {content_location}')
+            log(f"Content-Location: {content_location}")
         if x_archive_orig_url:
-            log(f'X-Archive-Orig-Url: {x_archive_orig_url}')
+            log(f"X-Archive-Orig-Url: {x_archive_orig_url}")
 
         # Build archive URL
         if content_location:
-            archive_url = f'https://web.archive.org{content_location}'
-            Path(OUTPUT_FILE).write_text(archive_url, encoding='utf-8')
-            log(f'Saved archive URL -> {archive_url}')
-            return True, OUTPUT_FILE, ''
-        elif 'web.archive.org' in response.url:
+            archive_url = f"https://web.archive.org{content_location}"
+            Path(OUTPUT_FILE).write_text(archive_url, encoding="utf-8")
+            log(f"Saved archive URL -> {archive_url}")
+            return True, OUTPUT_FILE, ""
+        elif "web.archive.org" in response.url:
             # We were redirected to an archive page
-            Path(OUTPUT_FILE).write_text(response.url, encoding='utf-8')
-            log(f'Redirected to archive page -> {response.url}')
-            return True, OUTPUT_FILE, ''
+            Path(OUTPUT_FILE).write_text(response.url, encoding="utf-8")
+            log(f"Redirected to archive page -> {response.url}")
+            return True, OUTPUT_FILE, ""
         else:
             # Check for errors in response
-            if 'RobotAccessControlException' in response.text:
+            if "RobotAccessControlException" in response.text:
                 # Blocked by robots.txt - save submit URL for manual retry
-                Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
-                log('Blocked by robots.txt, saved submit URL for manual retry')
-                return True, OUTPUT_FILE, ''  # Consider this a soft success
+                Path(OUTPUT_FILE).write_text(submit_url, encoding="utf-8")
+                log("Blocked by robots.txt, saved submit URL for manual retry")
+                return True, OUTPUT_FILE, ""  # Consider this a soft success
             elif response.status_code >= 400:
-                return False, None, f'HTTP {response.status_code}'
+                return False, None, f"HTTP {response.status_code}"
             else:
                 # Save submit URL anyway
-                Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
-                log('No archive URL returned, saved submit URL for manual retry')
-                return True, OUTPUT_FILE, ''
+                Path(OUTPUT_FILE).write_text(submit_url, encoding="utf-8")
+                log("No archive URL returned, saved submit URL for manual retry")
+                return True, OUTPUT_FILE, ""
 
     except requests.Timeout:
-        return False, None, f'Request timed out after {timeout} seconds'
+        return False, None, f"Request timed out after {timeout} seconds"
     except requests.RequestException as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to submit to archive.org')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to submit to archive.org")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Submit a URL to archive.org for archiving."""
 
     # Check if feature is enabled
-    if get_env('ARCHIVEDOTORG_ENABLED', 'True').lower() in ('false', '0', 'no', 'off'):
-        print('Skipping archive.org submission (ARCHIVEDOTORG_ENABLED=False)', file=sys.stderr)
+    if get_env("ARCHIVEDOTORG_ENABLED", "True").lower() in ("false", "0", "no", "off"):
+        print(
+            "Skipping archive.org submission (ARCHIVEDOTORG_ENABLED=False)",
+            file=sys.stderr,
+        )
         # Temporary failure (config disabled) - NO JSONL emission
         sys.exit(0)
 
@@ -132,23 +138,23 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult with output file
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or '',
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error (network, timeout, HTTP error) - emit NO JSONL
             # System will retry later
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Unexpected error - also transient, emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/archivedotorg/tests/test_archivedotorg.py b/abx_plugins/plugins/archivedotorg/tests/test_archivedotorg.py
index 1e4b4a9..3773e6f 100644
--- a/abx_plugins/plugins/archivedotorg/tests/test_archivedotorg.py
+++ b/abx_plugins/plugins/archivedotorg/tests/test_archivedotorg.py
@@ -12,29 +12,52 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_archivedotorg.*'), None)
-TEST_URL = 'https://example.com'
+_ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_archivedotorg.*"), None)
+if _ARCHIVEDOTORG_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+ARCHIVEDOTORG_HOOK = _ARCHIVEDOTORG_HOOK
+TEST_URL = "https://example.com"
+
 
 def test_hook_script_exists():
     assert ARCHIVEDOTORG_HOOK.exists()
 
+
 def test_submits_to_archivedotorg():
     with tempfile.TemporaryDirectory() as tmpdir:
+        import os
+
+        env = os.environ.copy()
+        # Keep the hook's own network timeout below subprocess timeout so failures
+        # return cleanly as exit=1 instead of being killed by pytest.
+        env["ARCHIVEDOTORG_TIMEOUT"] = "45"
+
         result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=60
+            [
+                sys.executable,
+                str(ARCHIVEDOTORG_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test789",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=90,
         )
 
         assert result.returncode in (0, 1)
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
@@ -43,41 +66,79 @@ def test_submits_to_archivedotorg():
         if result.returncode == 0:
             # Success - should have ArchiveResult
             assert result_json, "Should have ArchiveResult JSONL output on success"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+            assert result_json["status"] == "succeeded", (
+                f"Should succeed: {result_json}"
+            )
         else:
             # Transient error - no JSONL output, just stderr
             assert not result_json, "Should NOT emit JSONL on transient error"
             assert result.stderr, "Should have error message in stderr"
 
+
 def test_config_save_archivedotorg_false_skips():
     with tempfile.TemporaryDirectory() as tmpdir:
         import os
+
         env = os.environ.copy()
-        env['ARCHIVEDOTORG_ENABLED'] = 'False'
+        env["ARCHIVEDOTORG_ENABLED"] = "False"
 
         result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
+            [
+                sys.executable,
+                str(ARCHIVEDOTORG_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
+
 
 def test_handles_timeout():
     with tempfile.TemporaryDirectory() as tmpdir:
         import os
+
         env = os.environ.copy()
-        env['TIMEOUT'] = '1'
+        env["TIMEOUT"] = "1"
 
         result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
+            [
+                sys.executable,
+                str(ARCHIVEDOTORG_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testtimeout",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30,
         )
 
         # Timeout is a transient error - should exit 1 with no JSONL
@@ -85,9 +146,15 @@ def test_handles_timeout():
 
         # If it timed out (exit 1), should have no JSONL output
         if result.returncode == 1:
-            jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                          if line.strip().startswith('{')]
-            assert len(jsonl_lines) == 0, "Should not emit JSONL on timeout (transient error)"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+            jsonl_lines = [
+                line
+                for line in result.stdout.strip().split("\n")
+                if line.strip().startswith("{")
+            ]
+            assert len(jsonl_lines) == 0, (
+                "Should not emit JSONL on timeout (transient error)"
+            )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/brew/on_Binary__12_brew_install.py b/abx_plugins/plugins/brew/on_Binary__12_brew_install.py
index 9ac19f6..6781f33 100755
--- a/abx_plugins/plugins/brew/on_Binary__12_brew_install.py
+++ b/abx_plugins/plugins/brew/on_Binary__12_brew_install.py
@@ -14,27 +14,30 @@
 #
 
 import json
-import os
 import sys
 
 import rich_click as click
-from abx_pkg import Binary, BrewProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-BrewProvider.model_rebuild()
+from abx_pkg import Binary, BrewProvider, EnvProvider
 
 
 @click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', default=None, help="Custom install command")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str | None, overrides: str | None):
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--binary-id", required=True, help="Dependency UUID")
+@click.option("--name", required=True, help="Binary name to install")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--custom-cmd", default=None, help="Custom install command")
+@click.option("--overrides", default=None, help="JSON-encoded overrides dict")
+def main(
+    binary_id: str,
+    machine_id: str,
+    name: str,
+    binproviders: str,
+    custom_cmd: str | None,
+    overrides: str | None,
+):
     """Install binary using Homebrew."""
 
-    if binproviders != '*' and 'brew' not in binproviders.split(','):
+    if binproviders != "*" and "brew" not in binproviders.split(","):
         click.echo(f"brew provider not allowed for {name}", err=True)
         sys.exit(0)
 
@@ -44,7 +47,7 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
         click.echo("brew not available on this system", err=True)
         sys.exit(1)
 
-    click.echo(f"Installing {name} via brew...", err=True)
+    click.echo(f"Resolving {name} via brew (load or install)...", err=True)
 
     try:
         # Parse overrides if provided
@@ -52,11 +55,20 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
         if overrides:
             try:
                 overrides_dict = json.loads(overrides)
-                click.echo(f"Using custom install overrides: {overrides_dict}", err=True)
+                click.echo(
+                    f"Using custom install overrides: {overrides_dict}", err=True
+                )
             except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides=overrides_dict or {}).install()
+                click.echo(
+                    f"Warning: Failed to parse overrides JSON: {overrides}", err=True
+                )
+
+        # Prefer already-installed binaries found in PATH, then fall back to brew install.
+        binary = Binary(
+            name=name,
+            binproviders=[EnvProvider(), provider],
+            overrides=overrides_dict or {},
+        ).load_or_install()
     except Exception as e:
         click.echo(f"brew install failed: {e}", err=True)
         sys.exit(1)
@@ -65,18 +77,22 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
         click.echo(f"{name} not found after brew install", err=True)
         sys.exit(1)
 
-    machine_id = os.environ.get('MACHINE_ID', '')
+    resolved_provider = getattr(binary, "binprovider", None)
+    if isinstance(resolved_provider, str):
+        resolved_provider_name = resolved_provider
+    else:
+        resolved_provider_name = getattr(resolved_provider, "name", "") or ""
 
     # Output Binary JSONL record to stdout
     record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'brew',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
+        "type": "Binary",
+        "name": name,
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": resolved_provider_name,
+        "machine_id": machine_id,
+        "binary_id": binary_id,
     }
     print(json.dumps(record))
 
@@ -87,5 +103,5 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/chrome/chrome_utils.js b/abx_plugins/plugins/chrome/chrome_utils.js
index b14eb56..c1b75c0 100755
--- a/abx_plugins/plugins/chrome/chrome_utils.js
+++ b/abx_plugins/plugins/chrome/chrome_utils.js
@@ -201,35 +201,77 @@ function findFreePort() {
  */
 function waitForDebugPort(port, timeout = 30000) {
     const startTime = Date.now();
+    let lastFailure = 'no response yet';
+    const host = '127.0.0.1';
 
-    return new Promise((resolve, reject) => {
-        const tryConnect = () => {
-            if (Date.now() - startTime > timeout) {
-                reject(new Error(`Timeout waiting for Chrome debug port ${port}`));
-                return;
-            }
+    const normalizeWsUrl = (rawWsUrl) => {
+        try {
+            const parsed = new URL(rawWsUrl);
+            if (!parsed.port) parsed.port = String(port);
+            return parsed.toString();
+        } catch (e) {
+            return rawWsUrl;
+        }
+    };
 
-            const req = http.get(`http://127.0.0.1:${port}/json/version`, (res) => {
+    const probeDebugPort = () => new Promise((resolve, reject) => {
+        const req = http.request(
+            {
+                host,
+                port,
+                path: '/json/version',
+                method: 'GET',
+                headers: {
+                    Host: `${host}:${port}`,
+                    Connection: 'close',
+                },
+                timeout: 5000,
+            },
+            (res) => {
                 let data = '';
                 res.on('data', (chunk) => (data += chunk));
                 res.on('end', () => {
+                    if ((res.statusCode || 0) >= 400) {
+                        reject(new Error(`HTTP ${res.statusCode}`));
+                        return;
+                    }
                     try {
                         const info = JSON.parse(data);
+                        if (!info?.webSocketDebuggerUrl) {
+                            reject(new Error('missing webSocketDebuggerUrl in /json/version response'));
+                            return;
+                        }
+                        info.webSocketDebuggerUrl = normalizeWsUrl(info.webSocketDebuggerUrl);
                         resolve(info);
-                    } catch (e) {
-                        setTimeout(tryConnect, 100);
+                    } catch (error) {
+                        reject(new Error(`invalid /json/version payload: ${error.message}`));
                     }
                 });
-            });
+            }
+        );
+        req.on('error', reject);
+        req.on('timeout', () => {
+            req.destroy(new Error('request timeout'));
+        });
+        req.end();
+    });
 
-            req.on('error', () => {
-                setTimeout(tryConnect, 100);
-            });
+    return new Promise((resolve, reject) => {
+        const tryConnect = async () => {
+            if (Date.now() - startTime > timeout) {
+                reject(new Error(`Timeout waiting for Chrome debug port ${port} (${lastFailure})`));
+                return;
+            }
 
-            req.setTimeout(1000, () => {
-                req.destroy();
-                setTimeout(tryConnect, 100);
-            });
+            try {
+                const info = await probeDebugPort();
+                resolve(info);
+                return;
+            } catch (error) {
+                lastFailure = `${host}: ${error.message}`;
+            }
+
+            setTimeout(tryConnect, 100);
         };
 
         tryConnect();
@@ -566,8 +608,10 @@ async function launchChromium(options = {}) {
 
         // Wait for debug port
         console.error(`[*] Waiting for debug port ${debugPort}...`);
-        const versionInfo = await waitForDebugPort(debugPort, 30000);
+        const debugProbeTimeoutMs = getEnvInt('CHROME_DEBUG_PORT_TIMEOUT_MS', 30000);
+        const versionInfo = await waitForDebugPort(debugPort, debugProbeTimeoutMs);
         const wsUrl = versionInfo.webSocketDebuggerUrl;
+
         console.error(`[+] Chromium ready: ${wsUrl}`);
 
         fs.writeFileSync(path.join(outputDir, 'cdp_url.txt'), wsUrl);
@@ -1000,6 +1044,63 @@ async function loadOrInstallExtension(ext, extensions_dir = null) {
  * @param {Object} target - Puppeteer target object
  * @returns {Promise<Object>} - Object with target_is_bg, extension_id, manifest_version, etc.
  */
+const CHROME_EXTENSION_URL_PREFIX = 'chrome-extension://';
+const EXTENSION_BACKGROUND_TARGET_TYPES = new Set(['service_worker', 'background_page']);
+
+/**
+ * Parse extension ID from a target URL.
+ *
+ * @param {string|null|undefined} targetUrl - URL from Puppeteer target
+ * @returns {string|null} - Extension ID if URL is a chrome-extension URL
+ */
+function getExtensionIdFromUrl(targetUrl) {
+    if (!targetUrl || !targetUrl.startsWith(CHROME_EXTENSION_URL_PREFIX)) return null;
+    return targetUrl.slice(CHROME_EXTENSION_URL_PREFIX.length).split('/')[0] || null;
+}
+
+/**
+ * Filter extension list to entries with unpacked paths.
+ *
+ * @param {Array} extensions - Extension metadata list
+ * @returns {Array} - Extensions with unpacked_path
+ */
+function getValidInstalledExtensions(extensions) {
+    if (!Array.isArray(extensions) || extensions.length === 0) return [];
+    return extensions.filter(ext => ext?.unpacked_path);
+}
+
+async function tryGetExtensionContext(target, targetType) {
+    if (targetType === 'service_worker') return await target.worker();
+    return await target.page();
+}
+
+async function waitForExtensionTargetType(browser, extensionId, targetType, timeout) {
+    const target = await browser.waitForTarget(
+        candidate => candidate.type() === targetType &&
+            getExtensionIdFromUrl(candidate.url()) === extensionId,
+        { timeout }
+    );
+    return await tryGetExtensionContext(target, targetType);
+}
+
+/**
+ * Wait for a Puppeteer target handle for a specific extension id.
+ *
+ * @param {Object} browser - Puppeteer browser instance
+ * @param {string} extensionId - Extension ID
+ * @param {number} [timeout=30000] - Timeout in milliseconds
+ * @returns {Promise<Object>} - Puppeteer target
+ */
+async function waitForExtensionTargetHandle(browser, extensionId, timeout = 30000) {
+    return await browser.waitForTarget(
+        target =>
+            getExtensionIdFromUrl(target.url()) === extensionId &&
+            (EXTENSION_BACKGROUND_TARGET_TYPES.has(target.type()) ||
+                target.url().startsWith(CHROME_EXTENSION_URL_PREFIX)),
+        { timeout }
+    );
+}
+
 async function isTargetExtension(target) {
     let target_type;
     let target_ctx;
@@ -1021,12 +1122,12 @@ async function isTargetExtension(target) {
     }
 
     // Check if this is an extension background page or service worker
-    const is_chrome_extension = target_url?.startsWith('chrome-extension://');
+    const extension_id = getExtensionIdFromUrl(target_url);
+    const is_chrome_extension = Boolean(extension_id);
     const is_background_page = target_type === 'background_page';
     const is_service_worker = target_type === 'service_worker';
     const target_is_bg = is_chrome_extension && (is_background_page || is_service_worker);
 
-    let extension_id = null;
     let manifest_version = null;
     let manifest = null;
     let manifest_name = null;
@@ -1034,8 +1135,6 @@ async function isTargetExtension(target) {
 
     if (target_is_extension) {
         try {
-            extension_id = target_url?.split('://')[1]?.split('/')[0] || null;
-
             if (target_ctx) {
                 manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
                 manifest_version = manifest?.manifest_version || null;
@@ -1075,6 +1174,7 @@ async function loadExtensionFromTarget(extensions, target) {
         target_url,
         extension_id,
         manifest_version,
+        manifest,
     } = await isTargetExtension(target);
 
     if (!(target_is_bg && extension_id && target_ctx)) {
@@ -1088,12 +1188,8 @@ async function loadExtensionFromTarget(extensions, target) {
         return null;
     }
 
-    // Load manifest from the extension context
-    let manifest = null;
-    try {
-        manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
-    } catch (err) {
-        console.error(`[❌] Failed to read manifest for extension ${extension_id}:`, err);
+    if (!manifest) {
+        console.error(`[❌] Failed to read manifest for extension ${extension_id}`);
         return null;
     }
 
@@ -1186,11 +1282,14 @@ async function installAllExtensions(extensions, extensions_dir = null) {
  * @param {Array} extensions - Array of extension metadata objects
  * @returns {Promise<Array>} - Array of loaded extension objects with connection handlers
  */
-async function loadAllExtensionsFromBrowser(browser, extensions) {
+async function loadAllExtensionsFromBrowser(browser, extensions, timeout = 30000) {
     console.log(`[⚙️] Loading ${extensions.length} chrome extensions from browser...`);
 
-    // Find loaded extensions at runtime by examining browser targets
-    for (const target of browser.targets()) {
+    for (const extension of getValidInstalledExtensions(extensions)) {
+        if (!extension.id) {
+            throw new Error(`Extension ${extension.name || extension.unpacked_path} missing id`);
+        }
+        const target = await waitForExtensionTargetHandle(browser, extension.id, timeout);
         await loadExtensionFromTarget(extensions, target);
     }
 
@@ -1230,12 +1329,8 @@ function loadExtensionManifest(unpacked_path) {
  */
 function getExtensionLaunchArgs(extensions) {
     console.warn('[DEPRECATED] getExtensionLaunchArgs is deprecated. Use puppeteer enableExtensions option instead.');
-    if (!extensions || extensions.length === 0) {
-        return [];
-    }
-
-    // Filter out extensions without unpacked_path first
-    const validExtensions = extensions.filter(ext => ext.unpacked_path);
+    const validExtensions = getValidInstalledExtensions(extensions);
+    if (validExtensions.length === 0) return [];
 
     const unpacked_paths = validExtensions.map(ext => ext.unpacked_path);
     // Use computed id (from path hash) for allowlisting, as that's what Chrome uses for unpacked extensions
@@ -1258,12 +1353,7 @@ function getExtensionLaunchArgs(extensions) {
  * @returns {Array<string>} - Array of extension unpacked paths
  */
 function getExtensionPaths(extensions) {
-    if (!extensions || extensions.length === 0) {
-        return [];
-    }
-    return extensions
-        .filter(ext => ext.unpacked_path)
-        .map(ext => ext.unpacked_path);
+    return getValidInstalledExtensions(extensions).map(ext => ext.unpacked_path);
 }
 
 /**
@@ -1284,43 +1374,68 @@ function getExtensionPaths(extensions) {
  * @returns {Promise<Object>} - Worker or Page context for the extension
  */
 async function waitForExtensionTarget(browser, extensionId, timeout = 30000) {
-    // Try to find service worker first (Manifest V3)
-    try {
-        const workerTarget = await browser.waitForTarget(
-            target => target.type() === 'service_worker' &&
-                target.url().includes(`chrome-extension://${extensionId}`),
-            { timeout }
-        );
-        const worker = await workerTarget.worker();
-        if (worker) return worker;
-    } catch (err) {
-        // No service worker found, try background page
-    }
-
-    // Try background page (Manifest V2)
-    try {
-        const backgroundTarget = await browser.waitForTarget(
-            target => target.type() === 'background_page' &&
-                target.url().includes(`chrome-extension://${extensionId}`),
-            { timeout }
-        );
-        const page = await backgroundTarget.page();
-        if (page) return page;
-    } catch (err) {
-        // No background page found
+    for (const targetType of EXTENSION_BACKGROUND_TARGET_TYPES) {
+        try {
+            const context = await waitForExtensionTargetType(browser, extensionId, targetType, timeout);
+            if (context) return context;
+        } catch (err) {
+            // Continue to next extension target type
+        }
     }
 
     // Try any extension page as fallback
-    const extTarget = await browser.waitForTarget(
-        target => target.url().startsWith(`chrome-extension://${extensionId}`),
-        { timeout }
-    );
+    const extTarget = await waitForExtensionTargetHandle(browser, extensionId, timeout);
 
     // Return worker or page depending on target type
-    if (extTarget.type() === 'service_worker') {
-        return await extTarget.worker();
+    return await tryGetExtensionContext(extTarget, extTarget.type());
+}
+
+/**
+ * Read extensions metadata from chrome session directory.
+ *
+ * @param {string} chromeSessionDir - Path to chrome session directory
+ * @returns {Array<Object>|null} - Parsed extensions metadata list or null if unavailable
+ */
+function readExtensionsMetadata(chromeSessionDir) {
+    const extensionsFile = path.join(path.resolve(chromeSessionDir), 'extensions.json');
+    if (!fs.existsSync(extensionsFile)) return null;
+    try {
+        const parsed = JSON.parse(fs.readFileSync(extensionsFile, 'utf8'));
+        return Array.isArray(parsed) ? parsed : null;
+    } catch (e) {
+        return null;
+    }
+}
+
+/**
+ * Wait for extensions metadata to be written by chrome launch hook.
+ *
+ * @param {string} chromeSessionDir - Path to chrome session directory
+ * @param {number} [timeoutMs=10000] - Timeout in milliseconds
+ * @param {number} [intervalMs=250] - Poll interval in milliseconds
+ * @returns {Promise<Array<Object>>} - Parsed extensions metadata list
+ * @throws {Error} - If metadata file is not available in time
+ */
+async function waitForExtensionsMetadata(chromeSessionDir, timeoutMs = 10000, intervalMs = 250) {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+        const metadata = readExtensionsMetadata(chromeSessionDir);
+        if (metadata && metadata.length > 0) return metadata;
+        await new Promise(resolve => setTimeout(resolve, intervalMs));
     }
-    return await extTarget.page();
+    throw new Error(`Timeout waiting for extensions metadata in ${chromeSessionDir}`);
+}
+
+/**
+ * Find extension metadata entry by name.
+ *
+ * @param {Array<Object>} extensions - Parsed extensions metadata list
+ * @param {string} extensionName - Extension name to match
+ * @returns {Object|null} - Matching extension metadata entry
+ */
+function findExtensionMetadataByName(extensions, extensionName) {
+    const wanted = (extensionName || '').toLowerCase();
+    return extensions.find(ext => (ext?.name || '').toLowerCase() === wanted) || null;
 }
 
 /**
@@ -1332,16 +1447,13 @@ async function waitForExtensionTarget(browser, extensionId, timeout = 30000) {
 function getExtensionTargets(browser) {
     return browser.targets()
         .filter(target =>
-            target.url().startsWith('chrome-extension://') ||
-            target.type() === 'service_worker' ||
-            target.type() === 'background_page'
+            getExtensionIdFromUrl(target.url()) ||
+            EXTENSION_BACKGROUND_TARGET_TYPES.has(target.type())
         )
         .map(target => ({
             type: target.type(),
             url: target.url(),
-            extensionId: target.url().includes('chrome-extension://')
-                ? target.url().split('chrome-extension://')[1]?.split('/')[0]
-                : null,
+            extensionId: getExtensionIdFromUrl(target.url()),
         }));
 }
 
@@ -1619,6 +1731,13 @@ async function installExtensionWithCache(extension, options = {}) {
 // Snapshot Hook Utilities (for CDP-based plugins like ssl, responses, dns)
 // ============================================================================
 
+const CHROME_SESSION_FILES = Object.freeze({
+    cdpUrl: 'cdp_url.txt',
+    targetId: 'target_id.txt',
+    chromePid: 'chrome.pid',
+    pageLoaded: 'page_loaded.txt',
+});
+
 /**
  * Parse command line arguments into an object.
  * Handles --key=value and --flag formats.
@@ -1637,26 +1756,189 @@ function parseArgs() {
 }
 
 /**
- * Wait for Chrome session files to be ready.
- * Polls for cdp_url.txt and target_id.txt in the chrome session directory.
+ * Resolve all session marker file paths for a chrome session directory.
  *
- * @param {string} chromeSessionDir - Path to chrome session directory (e.g., '../chrome')
- * @param {number} [timeoutMs=60000] - Timeout in milliseconds
- * @returns {Promise<boolean>} - True if files are ready, false if timeout
+ * @param {string} chromeSessionDir - Path to chrome session directory
+ * @returns {{sessionDir: string, cdpFile: string, targetIdFile: string, chromePidFile: string, pageLoadedFile: string}}
+ */
+function getChromeSessionPaths(chromeSessionDir) {
+    const sessionDir = path.resolve(chromeSessionDir);
+    return {
+        sessionDir,
+        cdpFile: path.join(sessionDir, CHROME_SESSION_FILES.cdpUrl),
+        targetIdFile: path.join(sessionDir, CHROME_SESSION_FILES.targetId),
+        chromePidFile: path.join(sessionDir, CHROME_SESSION_FILES.chromePid),
+        pageLoadedFile: path.join(sessionDir, CHROME_SESSION_FILES.pageLoaded),
+    };
+}
+
+/**
+ * Read and trim a text file value if it exists.
+ *
+ * @param {string} filePath - File path
+ * @returns {string|null} - Trimmed file value or null
+ */
+function readSessionTextFile(filePath) {
+    if (!fs.existsSync(filePath)) return null;
+    const value = fs.readFileSync(filePath, 'utf8').trim();
+    return value || null;
+}
+
+/**
+ * Read the current chrome session state from marker files.
+ *
+ * @param {string} chromeSessionDir - Path to chrome session directory
+ * @returns {{sessionDir: string, cdpUrl: string|null, targetId: string|null, pid: number|null}}
+ */
+function readChromeSessionState(chromeSessionDir) {
+    const sessionPaths = getChromeSessionPaths(chromeSessionDir);
+    const cdpUrl = readSessionTextFile(sessionPaths.cdpFile);
+    const targetId = readSessionTextFile(sessionPaths.targetIdFile);
+    const rawPid = readSessionTextFile(sessionPaths.chromePidFile);
+    const parsedPid = rawPid ? parseInt(rawPid, 10) : NaN;
+    const pid = Number.isFinite(parsedPid) && parsedPid > 0 ? parsedPid : null;
+
+    return {
+        sessionDir: sessionPaths.sessionDir,
+        cdpUrl,
+        targetId,
+        pid,
+    };
+}
+
+/**
+ * Check if a chrome session state satisfies required fields.
+ *
+ * @param {{cdpUrl: string|null, targetId: string|null, pid: number|null}} state - Session state
+ * @param {Object} [options={}] - Validation options
+ * @param {boolean} [options.requireTargetId=false] - Require target ID marker
+ * @param {boolean} [options.requirePid=false] - Require PID marker
+ * @param {boolean} [options.requireAlivePid=false] - Require PID to be alive
+ * @returns {boolean} - True if state is valid
+ */
+function isValidChromeSessionState(state, options = {}) {
+    const {
+        requireTargetId = false,
+        requirePid = false,
+        requireAlivePid = false,
+    } = options;
+
+    if (!state?.cdpUrl) return false;
+    if (requireTargetId && !state.targetId) return false;
+    if ((requirePid || requireAlivePid) && !state.pid) return false;
+    if (requireAlivePid) {
+        try {
+            process.kill(state.pid, 0);
+        } catch (e) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/**
+ * Wait for a chrome session state to satisfy required fields.
+ *
+ * @param {string} chromeSessionDir - Path to chrome session directory
+ * @param {Object} [options={}] - Wait/validation options
+ * @param {number} [options.timeoutMs=60000] - Timeout in milliseconds
+ * @param {number} [options.intervalMs=100] - Poll interval in milliseconds
+ * @param {boolean} [options.requireTargetId=false] - Require target ID marker
+ * @param {boolean} [options.requirePid=false] - Require PID marker
+ * @param {boolean} [options.requireAlivePid=false] - Require PID to be alive
+ * @returns {Promise<{sessionDir: string, cdpUrl: string|null, targetId: string|null, pid: number|null}|null>}
  */
-async function waitForChromeSession(chromeSessionDir, timeoutMs = 60000) {
-    const cdpFile = path.join(chromeSessionDir, 'cdp_url.txt');
-    const targetIdFile = path.join(chromeSessionDir, 'target_id.txt');
+async function waitForChromeSessionState(chromeSessionDir, options = {}) {
+    const {
+        timeoutMs = 60000,
+        intervalMs = 100,
+        requireTargetId = false,
+        requirePid = false,
+        requireAlivePid = false,
+    } = options;
     const startTime = Date.now();
 
     while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            return true;
+        const state = readChromeSessionState(chromeSessionDir);
+        if (isValidChromeSessionState(state, { requireTargetId, requirePid, requireAlivePid })) {
+            return state;
         }
-        await new Promise(resolve => setTimeout(resolve, 100));
+        await new Promise(resolve => setTimeout(resolve, intervalMs));
+    }
+
+    return null;
+}
+
+/**
+ * Ensure puppeteer module was passed in by callers.
+ *
+ * @param {Object} puppeteer - Puppeteer module
+ * @param {string} callerName - Caller function name for errors
+ * @returns {Object} - Puppeteer module
+ * @throws {Error} - If puppeteer is missing
+ */
+function requirePuppeteerModule(puppeteer, callerName) {
+    if (!puppeteer) {
+        throw new Error(`puppeteer module must be passed to ${callerName}()`);
+    }
+    return puppeteer;
+}
+
+/**
+ * Resolve puppeteer module from installed dependencies.
+ *
+ * @returns {Object} - Loaded puppeteer module
+ * @throws {Error} - If no puppeteer package is installed
+ */
+function resolvePuppeteerModule() {
+    for (const moduleName of ['puppeteer-core', 'puppeteer']) {
+        try {
+            return require(moduleName);
+        } catch (e) {}
+    }
+    throw new Error('Missing puppeteer dependency (need puppeteer-core or puppeteer)');
+}
+
+/**
+ * Connect to a running browser, run an operation, and always disconnect.
+ *
+ * @param {Object} options - Connection options
+ * @param {Object} options.puppeteer - Puppeteer module
+ * @param {string} options.browserWSEndpoint - Browser websocket endpoint
+ * @param {Object} [options.connectOptions={}] - Additional puppeteer connect options
+ * @param {Function} operation - Async callback receiving the browser
+ * @returns {Promise<*>} - Operation return value
+ */
+async function withConnectedBrowser(options, operation) {
+    const {
+        puppeteer,
+        browserWSEndpoint,
+        connectOptions = {},
+    } = options;
+
+    const browser = await puppeteer.connect({
+        browserWSEndpoint,
+        ...connectOptions,
+    });
+    try {
+        return await operation(browser);
+    } finally {
+        await browser.disconnect();
     }
+}
 
-    return false;
+/**
+ * Wait for Chrome session files to be ready.
+ * Polls for cdp_url.txt and optionally target_id.txt in the chrome session directory.
+ *
+ * @param {string} chromeSessionDir - Path to chrome session directory (e.g., '../chrome')
+ * @param {number} [timeoutMs=60000] - Timeout in milliseconds
+ * @param {boolean} [requireTargetId=true] - Whether target_id.txt must exist
+ * @returns {Promise<boolean>} - True if files are ready, false if timeout
+ */
+async function waitForChromeSession(chromeSessionDir, timeoutMs = 60000, requireTargetId = true) {
+    const state = await waitForChromeSessionState(chromeSessionDir, { timeoutMs, requireTargetId });
+    return Boolean(state);
 }
 
 /**
@@ -1666,11 +1948,8 @@ async function waitForChromeSession(chromeSessionDir, timeoutMs = 60000) {
  * @returns {string|null} - CDP URL or null if not found
  */
 function readCdpUrl(chromeSessionDir) {
-    const cdpFile = path.join(chromeSessionDir, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
+    const { cdpFile } = getChromeSessionPaths(chromeSessionDir);
+    return readSessionTextFile(cdpFile);
 }
 
 /**
@@ -1680,11 +1959,123 @@ function readCdpUrl(chromeSessionDir) {
  * @returns {string|null} - Target ID or null if not found
  */
 function readTargetId(chromeSessionDir) {
-    const targetIdFile = path.join(chromeSessionDir, 'target_id.txt');
-    if (fs.existsSync(targetIdFile)) {
-        return fs.readFileSync(targetIdFile, 'utf8').trim();
+    const { targetIdFile } = getChromeSessionPaths(chromeSessionDir);
+    return readSessionTextFile(targetIdFile);
+}
+
+/**
+ * Read Chrome PID from chrome session directory.
+ *
+ * @param {string} chromeSessionDir - Path to chrome session directory
+ * @returns {number|null} - PID or null if invalid/missing
+ */
+function readChromePid(chromeSessionDir) {
+    return readChromeSessionState(chromeSessionDir).pid;
+}
+
+/**
+ * Resolve the active crawl-level Chrome session.
+ *
+ * @param {string} [crawlBaseDir='.'] - Crawl root directory
+ * @returns {{cdpUrl: string, pid: number, crawlChromeDir: string}}
+ * @throws {Error} - If session files are missing/invalid or process is dead
+ */
+function getCrawlChromeSession(crawlBaseDir = '.') {
+    const crawlChromeDir = path.join(path.resolve(crawlBaseDir), 'chrome');
+    const state = readChromeSessionState(crawlChromeDir);
+    if (!isValidChromeSessionState(state, { requirePid: true, requireAlivePid: true })) {
+        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
     }
-    return null;
+    return { cdpUrl: state.cdpUrl, pid: state.pid, crawlChromeDir };
+}
+
+/**
+ * Wait for an active crawl-level Chrome session.
+ *
+ * @param {number} timeoutMs - Timeout in milliseconds
+ * @param {Object} [options={}] - Optional settings
+ * @param {number} [options.intervalMs=250] - Poll interval in ms
+ * @param {string} [options.crawlBaseDir='.'] - Crawl root directory
+ * @returns {Promise<{cdpUrl: string, pid: number, crawlChromeDir: string}>}
+ * @throws {Error} - If timeout reached
+ */
+async function waitForCrawlChromeSession(timeoutMs, options = {}) {
+    const intervalMs = options.intervalMs || 250;
+    const crawlBaseDir = options.crawlBaseDir || '.';
+    const crawlChromeDir = path.join(path.resolve(crawlBaseDir), 'chrome');
+    const state = await waitForChromeSessionState(crawlChromeDir, {
+        timeoutMs,
+        intervalMs,
+        requirePid: true,
+        requireAlivePid: true,
+    });
+    if (!state) throw new Error(CHROME_SESSION_REQUIRED_ERROR);
+    return { cdpUrl: state.cdpUrl, pid: state.pid, crawlChromeDir };
+}
+
+/**
+ * Open a new tab in an existing Chrome session.
+ *
+ * @param {Object} options - Tab open options
+ * @param {string} options.cdpUrl - Browser CDP websocket URL
+ * @param {Object} options.puppeteer - Puppeteer module
+ * @returns {Promise<{targetId: string}>}
+ */
+async function openTabInChromeSession(options = {}) {
+    const { cdpUrl, puppeteer } = options;
+    if (!cdpUrl) {
+        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
+    }
+    const puppeteerModule = requirePuppeteerModule(puppeteer, 'openTabInChromeSession');
+
+    return withConnectedBrowser(
+        {
+            puppeteer: puppeteerModule,
+            browserWSEndpoint: cdpUrl,
+            connectOptions: { defaultViewport: null },
+        },
+        async (browser) => {
+        const page = await browser.newPage();
+        const targetId = page?.target()?._targetId;
+        if (!targetId) {
+            throw new Error('Failed to resolve target ID for new tab');
+        }
+        return { targetId };
+        }
+    );
+}
+
+/**
+ * Close a tab by target ID in an existing Chrome session.
+ *
+ * @param {Object} options - Tab close options
+ * @param {string} options.cdpUrl - Browser CDP websocket URL
+ * @param {string} options.targetId - Target ID to close
+ * @param {Object} options.puppeteer - Puppeteer module
+ * @returns {Promise<boolean>} - True if a tab was found and closed
+ */
+async function closeTabInChromeSession(options = {}) {
+    const { cdpUrl, targetId, puppeteer } = options;
+    if (!cdpUrl || !targetId) {
+        return false;
+    }
+    const puppeteerModule = requirePuppeteerModule(puppeteer, 'closeTabInChromeSession');
+
+    return withConnectedBrowser(
+        {
+            puppeteer: puppeteerModule,
+            browserWSEndpoint: cdpUrl,
+        },
+        async (browser) => {
+        const pages = await browser.pages();
+        const page = pages.find(p => p.target()?._targetId === targetId);
+        if (!page) {
+            return false;
+        }
+        await page.close();
+        return true;
+        }
+    );
 }
 
 /**
@@ -1697,7 +2088,9 @@ function readTargetId(chromeSessionDir) {
  * @param {Object} options - Connection options
  * @param {string} [options.chromeSessionDir='../chrome'] - Path to chrome session directory
  * @param {number} [options.timeoutMs=60000] - Timeout for waiting
- * @param {Object} [options.puppeteer] - Puppeteer module (must be passed in)
+ * @param {boolean} [options.requireTargetId=true] - Require target_id.txt in session dir
+ * @param {Object} [options.puppeteer] - Puppeteer module (preferred explicit form)
+ * @param {Object} [options.puppeteerModule] - Backward-compatible puppeteer module key
  * @returns {Promise<Object>} - { browser, page, targetId, cdpUrl }
  * @throws {Error} - If connection fails or page not found
  */
@@ -1705,51 +2098,52 @@ async function connectToPage(options = {}) {
     const {
         chromeSessionDir = '../chrome',
         timeoutMs = 60000,
+        requireTargetId = true,
         puppeteer,
+        puppeteerModule,
     } = options;
 
-    if (!puppeteer) {
-        throw new Error('puppeteer module must be passed to connectToPage()');
-    }
-
-    // Wait for chrome session to be ready
-    const sessionReady = await waitForChromeSession(chromeSessionDir, timeoutMs);
-    if (!sessionReady) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    // Read session files
-    const cdpUrl = readCdpUrl(chromeSessionDir);
-    if (!cdpUrl) {
+    // Support both key names and fall back to local resolution for compatibility
+    // with older callers that may omit explicit module injection.
+    const resolvedPuppeteer = puppeteer || puppeteerModule || resolvePuppeteerModule();
+    const state = await waitForChromeSessionState(chromeSessionDir, { timeoutMs, requireTargetId });
+    if (!state) {
         throw new Error(CHROME_SESSION_REQUIRED_ERROR);
     }
 
-    const targetId = readTargetId(chromeSessionDir);
-
     // Connect to browser
-    const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
+    const browser = await resolvedPuppeteer.connect({ browserWSEndpoint: state.cdpUrl });
 
-    // Find the target page
-    const pages = await browser.pages();
-    let page = null;
+    try {
+        // Find the target page
+        const pages = await browser.pages();
+        let page = null;
+
+        if (state.targetId) {
+            page = pages.find(p => {
+                const target = p.target();
+                return target && target._targetId === state.targetId;
+            });
+        }
 
-    if (targetId) {
-        page = pages.find(p => {
-            const target = p.target();
-            return target && target._targetId === targetId;
-        });
-    }
+        // Fallback to last page if target not found
+        if (!page) {
+            page = pages[pages.length - 1];
+        }
 
-    // Fallback to last page if target not found
-    if (!page) {
-        page = pages[pages.length - 1];
-    }
+        if (!page) {
+            throw new Error('No page found in browser');
+        }
 
-    if (!page) {
-        throw new Error('No page found in browser');
+        return { browser, page, targetId: state.targetId, cdpUrl: state.cdpUrl };
+    } catch (error) {
+        // connectToPage hands ownership of browser to callers on success;
+        // disconnect here only for failures that happen before handoff.
+        try {
+            await browser.disconnect();
+        } catch (disconnectError) {}
+        throw error;
     }
-
-    return { browser, page, targetId, cdpUrl };
 }
 
 /**
@@ -1763,16 +2157,16 @@ async function connectToPage(options = {}) {
  * @throws {Error} - If timeout waiting for navigation
  */
 async function waitForPageLoaded(chromeSessionDir, timeoutMs = 120000, postLoadDelayMs = 0) {
-    const pageLoadedMarker = path.join(chromeSessionDir, 'page_loaded.txt');
+    const { pageLoadedFile } = getChromeSessionPaths(chromeSessionDir);
     const pollInterval = 100;
     let waitTime = 0;
 
-    while (!fs.existsSync(pageLoadedMarker) && waitTime < timeoutMs) {
+    while (!fs.existsSync(pageLoadedFile) && waitTime < timeoutMs) {
         await new Promise(resolve => setTimeout(resolve, pollInterval));
         waitTime += pollInterval;
     }
 
-    if (!fs.existsSync(pageLoadedMarker)) {
+    if (!fs.existsSync(pageLoadedFile)) {
         throw new Error('Timeout waiting for navigation (chrome_navigate did not complete)');
     }
 
@@ -1782,6 +2176,40 @@ async function waitForPageLoaded(chromeSessionDir, timeoutMs = 120000, postLoadD
     }
 }
 
+/**
+ * Read all browser cookies from a running Chrome CDP debug port.
+ * Uses existing CDP bootstrap helpers and puppeteer connection logic.
+ *
+ * @param {number} port - Chrome remote debugging port
+ * @param {Object} [options={}] - Optional settings
+ * @param {number} [options.timeoutMs=10000] - Timeout waiting for debug port
+ * @returns {Promise<Array<Object>>} - Array of cookie objects
+ */
+async function getCookiesViaCdp(port, options = {}) {
+    const timeoutMs = options.timeoutMs || getEnvInt('CDP_COOKIE_TIMEOUT_MS', 10000);
+    const versionInfo = await waitForDebugPort(port, timeoutMs);
+    const browserWSEndpoint = versionInfo?.webSocketDebuggerUrl;
+    if (!browserWSEndpoint) {
+        throw new Error(`No webSocketDebuggerUrl from Chrome debug port ${port}`);
+    }
+    const puppeteerModule = resolvePuppeteerModule();
+
+    return withConnectedBrowser(
+        {
+            puppeteer: puppeteerModule,
+            browserWSEndpoint,
+        },
+        async (browser) => {
+        const pages = await browser.pages();
+        const page = pages[pages.length - 1] || await browser.newPage();
+        const session = await page.target().createCDPSession();
+        await session.send('Network.enable');
+        const result = await session.send('Network.getAllCookies');
+        return result?.cookies || [];
+        }
+    );
+}
+
 // Export all functions
 module.exports = {
     // Environment helpers
@@ -1816,10 +2244,14 @@ module.exports = {
     loadExtensionFromTarget,
     installAllExtensions,
     loadAllExtensionsFromBrowser,
+    waitForExtensionTargetHandle,
     // New puppeteer best-practices helpers
     getExtensionPaths,
     waitForExtensionTarget,
     getExtensionTargets,
+    readExtensionsMetadata,
+    waitForExtensionsMetadata,
+    findExtensionMetadataByName,
     // Shared path utilities (single source of truth for Python/JS)
     getMachineType,
     getLibDir,
@@ -1835,8 +2267,14 @@ module.exports = {
     waitForChromeSession,
     readCdpUrl,
     readTargetId,
+    readChromePid,
+    getCrawlChromeSession,
+    waitForCrawlChromeSession,
+    openTabInChromeSession,
+    closeTabInChromeSession,
     connectToPage,
     waitForPageLoaded,
+    getCookiesViaCdp,
 };
 
 // CLI usage
@@ -1851,6 +2289,8 @@ if (require.main === module) {
         console.log('  installChromium           Install Chromium via @puppeteer/browsers');
         console.log('  installPuppeteerCore      Install puppeteer-core npm package');
         console.log('  launchChromium            Launch Chrome with CDP debugging');
+        console.log('  getCookiesViaCdp <port>  Read browser cookies via CDP port');
+        console.log('  getCrawlChromeSession    Resolve active crawl chrome session');
         console.log('  killChrome <pid>          Kill Chrome process by PID');
         console.log('  killZombieChrome          Clean up zombie Chrome processes');
         console.log('');
@@ -1939,6 +2379,25 @@ if (require.main === module) {
                     break;
                 }
 
+                case 'getCookiesViaCdp': {
+                    const [portStr] = commandArgs;
+                    const port = parseInt(portStr, 10);
+                    if (isNaN(port) || port <= 0) {
+                        console.error('Invalid port');
+                        process.exit(1);
+                    }
+                    const cookies = await getCookiesViaCdp(port);
+                    console.log(JSON.stringify(cookies));
+                    break;
+                }
+
+                case 'getCrawlChromeSession': {
+                    const [crawlBaseDir] = commandArgs;
+                    const session = getCrawlChromeSession(crawlBaseDir || getEnv('CRAWL_DIR', '.'));
+                    console.log(JSON.stringify(session));
+                    break;
+                }
+
                 case 'killChrome': {
                     const [pidStr, outputDir] = commandArgs;
                     const pid = parseInt(pidStr, 10);
@@ -1986,6 +2445,18 @@ if (require.main === module) {
                     break;
                 }
 
+                case 'waitForExtensionsMetadata': {
+                    const [chromeSessionDir = '.', timeoutMsStr = '10000'] = commandArgs;
+                    const timeoutMs = parseInt(timeoutMsStr, 10);
+                    if (isNaN(timeoutMs) || timeoutMs <= 0) {
+                        console.error('Invalid timeoutMs');
+                        process.exit(1);
+                    }
+                    const metadata = await waitForExtensionsMetadata(chromeSessionDir, timeoutMs);
+                    console.log(JSON.stringify(metadata));
+                    break;
+                }
+
                 case 'getMachineType': {
                     console.log(getMachineType());
                     break;
diff --git a/abx_plugins/plugins/chrome/extract_cookies.js b/abx_plugins/plugins/chrome/extract_cookies.js
index c23515d..80c7b53 100644
--- a/abx_plugins/plugins/chrome/extract_cookies.js
+++ b/abx_plugins/plugins/chrome/extract_cookies.js
@@ -27,6 +27,7 @@ const {
     launchChromium,
     killChrome,
     getEnv,
+    getCookiesViaCdp,
 } = require('./chrome_utils.js');
 
 /**
@@ -146,75 +147,11 @@ async function main() {
         console.error(`[*] Chrome launched (PID: ${chromePid})`);
         console.error(`[*] CDP URL: ${cdpUrl}`);
 
-        // Connect to CDP and get cookies
-        const http = require('http');
-
-        // Use CDP directly via HTTP to get all cookies
-        const getCookies = () => {
-            return new Promise((resolve, reject) => {
-                const req = http.request(
-                    {
-                        hostname: '127.0.0.1',
-                        port: port,
-                        path: '/json/list',
-                        method: 'GET',
-                    },
-                    (res) => {
-                        let data = '';
-                        res.on('data', (chunk) => (data += chunk));
-                        res.on('end', () => {
-                            try {
-                                const targets = JSON.parse(data);
-                                // Find a page target
-                                const pageTarget = targets.find(t => t.type === 'page') || targets[0];
-                                if (!pageTarget) {
-                                    reject(new Error('No page target found'));
-                                    return;
-                                }
-
-                                // Connect via WebSocket and send CDP command
-                                const WebSocket = require('ws');
-                                const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
-
-                                ws.on('open', () => {
-                                    ws.send(JSON.stringify({
-                                        id: 1,
-                                        method: 'Network.getAllCookies',
-                                    }));
-                                });
-
-                                ws.on('message', (message) => {
-                                    const response = JSON.parse(message);
-                                    if (response.id === 1) {
-                                        ws.close();
-                                        if (response.result && response.result.cookies) {
-                                            resolve(response.result.cookies);
-                                        } else {
-                                            reject(new Error('Failed to get cookies: ' + JSON.stringify(response)));
-                                        }
-                                    }
-                                });
-
-                                ws.on('error', (err) => {
-                                    reject(err);
-                                });
-                            } catch (e) {
-                                reject(e);
-                            }
-                        });
-                    }
-                );
-
-                req.on('error', reject);
-                req.end();
-            });
-        };
-
         // Wait a moment for the browser to fully initialize
         await new Promise(r => setTimeout(r, 2000));
 
         console.error('[*] Fetching cookies via CDP...');
-        const cookies = await getCookies();
+        const cookies = await getCookiesViaCdp(port, { timeoutMs: 20000 });
 
         console.error(`[+] Retrieved ${cookies.length} cookies`);
 
diff --git a/abx_plugins/plugins/chrome/on_Crawl__70_chrome_install.py b/abx_plugins/plugins/chrome/on_Crawl__70_chrome_install.py
index 16c3371..cc40ff9 100755
--- a/abx_plugins/plugins/chrome/on_Crawl__70_chrome_install.py
+++ b/abx_plugins/plugins/chrome/on_Crawl__70_chrome_install.py
@@ -18,7 +18,7 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
@@ -26,21 +26,26 @@
 
 def main():
     # Check if Chrome is enabled
-    chrome_enabled = os.environ.get('CHROME_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
+    chrome_enabled = os.environ.get("CHROME_ENABLED", "true").lower() not in (
+        "false",
+        "0",
+        "no",
+        "off",
+    )
     if not chrome_enabled:
         sys.exit(0)
 
     record = {
-        'type': 'Binary',
-        'name': 'chromium',
-        'binproviders': 'puppeteer,env',
-        'overrides': {
-            'puppeteer': ['chromium@latest', '--install-deps'],
+        "type": "Binary",
+        "name": "chromium",
+        "binproviders": "puppeteer,env",
+        "overrides": {
+            "puppeteer": ["chromium@latest", "--install-deps"],
         },
     }
     print(json.dumps(record))
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js b/abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
index 8c41039..04d614e 100755
--- a/abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+++ b/abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
@@ -27,7 +27,16 @@ const { execSync } = require('child_process');
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 
 const puppeteer = require('puppeteer');
-const { getEnv, getEnvInt } = require('./chrome_utils.js');
+const {
+    getEnv,
+    getEnvInt,
+    readCdpUrl,
+    readTargetId,
+    waitForExtensionsMetadata,
+    waitForCrawlChromeSession,
+    openTabInChromeSession,
+    closeTabInChromeSession,
+} = require('./chrome_utils.js');
 
 // Extractor metadata
 const PLUGIN_NAME = 'chrome_tab';
@@ -39,7 +48,6 @@ if (!fs.existsSync(OUTPUT_DIR)) {
 }
 process.chdir(OUTPUT_DIR);
 const CHROME_SESSION_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
 
 let finalStatus = 'failed';
 let finalOutput = '';
@@ -85,114 +93,22 @@ async function cleanup(signal) {
         console.error(`\nReceived ${signal}, closing chrome tab...`);
     }
     try {
-        const cdpFile = path.join(OUTPUT_DIR, 'cdp_url.txt');
-        const targetIdFile = path.join(OUTPUT_DIR, 'target_id.txt');
-
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            const cdpUrl = fs.readFileSync(cdpFile, 'utf8').trim();
-            const targetId = fs.readFileSync(targetIdFile, 'utf8').trim();
-
-            const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-            const pages = await browser.pages();
-            const page = pages.find(p => p.target()._targetId === targetId);
-
-            if (page) {
-                await page.close();
-            }
-            browser.disconnect();
-        }
+        const cdpUrl = readCdpUrl(OUTPUT_DIR);
+        const targetId = readTargetId(OUTPUT_DIR);
+        await closeTabInChromeSession({ cdpUrl, targetId, puppeteer });
     } catch (e) {
         // Best effort
     }
-    emitResult();
-    process.exit(finalStatus === 'succeeded' ? 0 : 1);
+    const hasTargetId = Boolean(readTargetId(OUTPUT_DIR));
+    const status = hasTargetId ? 'succeeded' : finalStatus;
+    emitResult(status);
+    process.exit(status === 'succeeded' ? 0 : 1);
 }
 
 // Register signal handlers
 process.on('SIGTERM', () => cleanup('SIGTERM'));
 process.on('SIGINT', () => cleanup('SIGINT'));
 
-// Try to find the crawl's Chrome session
-function getCrawlChromeSession() {
-    const crawlBaseDir = getEnv('CRAWL_DIR', '.');
-    const crawlChromeDir = path.join(path.resolve(crawlBaseDir), 'chrome');
-    const cdpFile = path.join(crawlChromeDir, 'cdp_url.txt');
-    const pidFile = path.join(crawlChromeDir, 'chrome.pid');
-
-    if (!fs.existsSync(cdpFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    if (!fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const cdpUrl = fs.readFileSync(cdpFile, 'utf-8').trim();
-    const pid = parseInt(fs.readFileSync(pidFile, 'utf-8').trim(), 10);
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    if (!pid || Number.isNaN(pid)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    // Verify the process is still running
-    try {
-        process.kill(pid, 0);  // Signal 0 = check if process exists
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    return { cdpUrl, pid };
-}
-
-async function waitForCrawlChromeSession(timeoutMs, intervalMs = 250) {
-    const startTime = Date.now();
-    let lastError = null;
-
-    while (Date.now() - startTime < timeoutMs) {
-        try {
-            return getCrawlChromeSession();
-        } catch (e) {
-            lastError = e;
-        }
-        await new Promise(resolve => setTimeout(resolve, intervalMs));
-    }
-
-    if (lastError) {
-        throw lastError;
-    }
-    throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-}
-
-// Create a new tab in an existing Chrome session
-async function createTabInExistingChrome(cdpUrl, url, pid) {
-    console.log(`[*] Connecting to existing Chrome session: ${cdpUrl}`);
-
-    // Connect Puppeteer to the running Chrome
-    const browser = await puppeteer.connect({
-        browserWSEndpoint: cdpUrl,
-        defaultViewport: null,
-    });
-
-    // Create a new tab for this snapshot
-    const page = await browser.newPage();
-
-    // Get the page target ID
-    const target = page.target();
-    const targetId = target._targetId;
-
-    // Write session info
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'cdp_url.txt'), cdpUrl);
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'chrome.pid'), String(pid));
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'target_id.txt'), targetId);
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'url.txt'), url);
-
-    // Disconnect Puppeteer (Chrome and tab stay alive)
-    browser.disconnect();
-
-    return { success: true, output: OUTPUT_DIR, cdpUrl, targetId, pid };
-}
-
 async function main() {
     const args = parseArgs();
     const url = args.url;
@@ -222,20 +138,41 @@ async function main() {
 
         // Try to use existing crawl Chrome session (wait for readiness)
         const timeoutSeconds = getEnvInt('CHROME_TAB_TIMEOUT', getEnvInt('CHROME_TIMEOUT', getEnvInt('TIMEOUT', 60)));
-        const crawlSession = await waitForCrawlChromeSession(timeoutSeconds * 1000);
+        const crawlSession = await waitForCrawlChromeSession(timeoutSeconds * 1000, {
+            crawlBaseDir: getEnv('CRAWL_DIR', '.'),
+        });
         console.log(`[*] Found existing Chrome session from crawl ${crawlId}`);
-        const result = await createTabInExistingChrome(crawlSession.cdpUrl, url, crawlSession.pid);
 
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            console.log(`[+] Chrome tab ready`);
-            console.log(`[+] CDP URL: ${result.cdpUrl}`);
-            console.log(`[+] Page target ID: ${result.targetId}`);
-        } else {
-            status = 'failed';
-            error = result.error;
+        const { targetId } = await openTabInChromeSession({
+            cdpUrl: crawlSession.cdpUrl,
+            puppeteer,
+        });
+
+        fs.writeFileSync(path.join(OUTPUT_DIR, 'cdp_url.txt'), crawlSession.cdpUrl);
+        fs.writeFileSync(path.join(OUTPUT_DIR, 'chrome.pid'), String(crawlSession.pid));
+        fs.writeFileSync(path.join(OUTPUT_DIR, 'target_id.txt'), targetId);
+        fs.writeFileSync(path.join(OUTPUT_DIR, 'url.txt'), url);
+
+        // Mark success immediately after tab creation so SIGTERM cleanup exits 0.
+        status = 'succeeded';
+        output = OUTPUT_DIR;
+        finalStatus = status;
+        finalOutput = output;
+        finalError = '';
+        cmdVersion = version || '';
+
+        try {
+            const extensionsMetadata = await waitForExtensionsMetadata(crawlSession.crawlChromeDir, 10000);
+            fs.writeFileSync(
+                path.join(OUTPUT_DIR, 'extensions.json'),
+                JSON.stringify(extensionsMetadata, null, 2)
+            );
+        } catch (err) {
+            // Extension metadata is optional for non-extension snapshots.
         }
+        console.log(`[+] Chrome tab ready`);
+        console.log(`[+] CDP URL: ${crawlSession.cdpUrl}`);
+        console.log(`[+] Page target ID: ${targetId}`);
     } catch (e) {
         error = `${e.name}: ${e.message}`;
         status = 'failed';
diff --git a/abx_plugins/plugins/chrome/on_Snapshot__30_chrome_navigate.js b/abx_plugins/plugins/chrome/on_Snapshot__30_chrome_navigate.js
index e514493..2d09e3e 100644
--- a/abx_plugins/plugins/chrome/on_Snapshot__30_chrome_navigate.js
+++ b/abx_plugins/plugins/chrome/on_Snapshot__30_chrome_navigate.js
@@ -20,6 +20,9 @@ const path = require('path');
 // Add NODE_MODULES_DIR to module resolution paths if set
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer');
+const {
+    connectToPage,
+} = require('./chrome_utils.js');
 
 const PLUGIN_NAME = 'chrome_navigate';
 const CHROME_SESSION_DIR = '.';
@@ -30,7 +33,6 @@ if (!fs.existsSync(OUTPUT_DIR)) {
     fs.mkdirSync(OUTPUT_DIR, { recursive: true });
 }
 process.chdir(OUTPUT_DIR);
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
 
 function parseArgs() {
     const args = {};
@@ -57,34 +59,6 @@ function getEnvFloat(name, defaultValue = 0) {
     return isNaN(val) ? defaultValue : val;
 }
 
-async function waitForChromeTabOpen(timeoutMs = 60000) {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (!fs.existsSync(cdpFile)) return null;
-    return fs.readFileSync(cdpFile, 'utf8').trim();
-}
-
-function getPageId() {
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (!fs.existsSync(targetIdFile)) return null;
-    return fs.readFileSync(targetIdFile, 'utf8').trim();
-}
-
 function getWaitCondition() {
     const waitFor = getEnv('CHROME_WAIT_FOR', 'networkidle2').toLowerCase();
     const valid = ['domcontentloaded', 'load', 'networkidle0', 'networkidle2'];
@@ -95,34 +69,23 @@ function sleep(ms) {
     return new Promise(resolve => setTimeout(resolve, ms));
 }
 
-async function navigate(url, cdpUrl) {
+async function navigate(url) {
     const timeout = (getEnvInt('CHROME_PAGELOAD_TIMEOUT') || getEnvInt('CHROME_TIMEOUT') || getEnvInt('TIMEOUT', 60)) * 1000;
     const delayAfterLoad = getEnvFloat('CHROME_DELAY_AFTER_LOAD', 0) * 1000;
     const waitUntil = getWaitCondition();
-    const targetId = getPageId();
 
     let browser = null;
     const navStartTime = Date.now();
 
     try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            return { success: false, error: 'No pages found in browser', waitUntil, elapsed: Date.now() - navStartTime };
-        }
-
-        // Find page by target ID if available
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
+        const conn = await connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs: timeout,
+            requireTargetId: true,
+            puppeteer,
+        });
+        browser = conn.browser;
+        const page = conn.page;
 
         // Navigate
         console.log(`Navigating to ${url} (wait: ${waitUntil}, timeout: ${timeout}ms)`);
@@ -179,20 +142,7 @@ async function main() {
     let output = null;
     let error = '';
 
-    // Wait for chrome tab to be open (up to 60s)
-    const tabOpen = await waitForChromeTabOpen(60000);
-    if (!tabOpen) {
-        console.error(`ERROR: ${CHROME_SESSION_REQUIRED_ERROR}`);
-        process.exit(1);
-    }
-
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        console.error(`ERROR: ${CHROME_SESSION_REQUIRED_ERROR}`);
-        process.exit(1);
-    }
-
-    const result = await navigate(url, cdpUrl);
+    const result = await navigate(url);
 
     if (result.success) {
         status = 'succeeded';
diff --git a/abx_plugins/plugins/chrome/tests/chrome_test_helpers.py b/abx_plugins/plugins/chrome/tests/chrome_test_helpers.py
index f80fe61..6909dbd 100644
--- a/abx_plugins/plugins/chrome/tests/chrome_test_helpers.py
+++ b/abx_plugins/plugins/chrome/tests/chrome_test_helpers.py
@@ -60,13 +60,13 @@
 import platform
 import signal
 import ssl
+import fcntl
 import subprocess
 import sys
 import threading
 import time
 import urllib.parse
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
-from datetime import datetime
 from pathlib import Path
 from typing import Tuple, Optional, List, Dict, Any
 from contextlib import contextmanager
@@ -81,47 +81,62 @@
 PLUGINS_ROOT = CHROME_PLUGIN_DIR.parent
 
 # Hook script locations
-CHROME_INSTALL_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__70_chrome_install.py'
-CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__90_chrome_launch.bg.js'
-CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / 'on_Snapshot__10_chrome_tab.bg.js'
-CHROME_NAVIGATE_HOOK = next(CHROME_PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
-CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'
-PUPPETEER_BINARY_HOOK = PLUGINS_ROOT / 'puppeteer' / 'on_Binary__12_puppeteer_install.py'
-PUPPETEER_CRAWL_HOOK = PLUGINS_ROOT / 'puppeteer' / 'on_Crawl__60_puppeteer_install.py'
-NPM_BINARY_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__10_npm_install.py'
+CHROME_INSTALL_HOOK = CHROME_PLUGIN_DIR / "on_Crawl__70_chrome_install.py"
+CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / "on_Crawl__90_chrome_launch.bg.js"
+CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / "on_Snapshot__10_chrome_tab.bg.js"
+_CHROME_NAVIGATE_HOOK = next(
+    CHROME_PLUGIN_DIR.glob("on_Snapshot__*_chrome_navigate.*"), None
+)
+if _CHROME_NAVIGATE_HOOK is None:
+    raise FileNotFoundError(
+        f"Could not find chrome navigate hook in {CHROME_PLUGIN_DIR}"
+    )
+CHROME_NAVIGATE_HOOK = _CHROME_NAVIGATE_HOOK
+CHROME_UTILS = CHROME_PLUGIN_DIR / "chrome_utils.js"
+PUPPETEER_BINARY_HOOK = (
+    PLUGINS_ROOT / "puppeteer" / "on_Binary__12_puppeteer_install.py"
+)
+PUPPETEER_CRAWL_HOOK = PLUGINS_ROOT / "puppeteer" / "on_Crawl__60_puppeteer_install.py"
+NPM_BINARY_HOOK = PLUGINS_ROOT / "npm" / "on_Binary__10_npm_install.py"
 
 
 # Prefer root-level URL fixtures if they exist, otherwise fall back to a local server.
 _ROOT_URL_FIXTURE_NAMES = (
-    'local_test_urls',
-    'test_urls',
-    'deterministic_urls',
-    'local_http_url',
-    'local_url',
-    'test_url',
+    "local_test_urls",
+    "test_urls",
+    "deterministic_urls",
+    "local_http_url",
+    "local_url",
+    "test_url",
 )
 
 
 class _DeterministicTestRequestHandler(BaseHTTPRequestHandler):
     """HTTP handler that serves predictable pages for Chrome-dependent tests."""
 
-    server_version = 'ABXDeterministicHTTP/1.0'
+    server_version = "ABXDeterministicHTTP/1.0"
 
     def log_message(self, format: str, *args: Any) -> None:
         # Keep pytest output clean unless a test fails.
         return
 
     def _origin(self) -> str:
-        host = self.headers.get('Host', '127.0.0.1')
-        scheme = 'https' if isinstance(self.connection, ssl.SSLSocket) else 'http'
-        return f'{scheme}://{host}'
-
-    def _write(self, status: int, body: str, content_type: str = 'text/html; charset=utf-8', headers: Optional[Dict[str, str]] = None) -> None:
-        payload = body.encode('utf-8')
+        host = self.headers.get("Host", "127.0.0.1")
+        scheme = "https" if isinstance(self.connection, ssl.SSLSocket) else "http"
+        return f"{scheme}://{host}"
+
+    def _write(
+        self,
+        status: int,
+        body: str,
+        content_type: str = "text/html; charset=utf-8",
+        headers: Optional[Dict[str, str]] = None,
+    ) -> None:
+        payload = body.encode("utf-8")
         self.send_response(status)
-        self.send_header('Content-Type', content_type)
-        self.send_header('Content-Length', str(len(payload)))
-        self.send_header('Connection', 'close')
+        self.send_header("Content-Type", content_type)
+        self.send_header("Content-Length", str(len(payload)))
+        self.send_header("Connection", "close")
         if headers:
             for key, value in headers.items():
                 self.send_header(key, value)
@@ -130,10 +145,10 @@ def _write(self, status: int, body: str, content_type: str = 'text/html; charset
 
     def do_GET(self) -> None:  # noqa: N802
         parsed = urllib.parse.urlparse(self.path)
-        path = parsed.path or '/'
+        path = parsed.path or "/"
         origin = self._origin()
 
-        if path in ('/', '/index.html'):
+        if path in ("/", "/index.html"):
             html = f"""<!doctype html>
 <html>
 <head>
@@ -157,35 +172,55 @@ def do_GET(self) -> None:  # noqa: N802
             self._write(200, html)
             return
 
-        if path == '/linked':
-            self._write(200, '<html><head><title>Linked Page</title></head><body><h1>Linked Page</h1></body></html>')
+        if path == "/linked":
+            self._write(
+                200,
+                "<html><head><title>Linked Page</title></head><body><h1>Linked Page</h1></body></html>",
+            )
             return
 
-        if path == '/redirect':
+        if path == "/redirect":
             self.send_response(302)
-            self.send_header('Location', '/')
-            self.send_header('Content-Length', '0')
-            self.send_header('Connection', 'close')
+            self.send_header("Location", "/")
+            self.send_header("Content-Length", "0")
+            self.send_header("Connection", "close")
             self.end_headers()
             return
 
-        if path in ('/nonexistent-page-404', '/not-found'):
-            self._write(404, '<html><head><title>Not Found</title></head><body><h1>404 Not Found</h1></body></html>')
+        if path in ("/nonexistent-page-404", "/not-found"):
+            self._write(
+                404,
+                "<html><head><title>Not Found</title></head><body><h1>404 Not Found</h1></body></html>",
+            )
             return
 
-        if path == '/static/test.txt':
-            self._write(200, 'static fixture payload', content_type='text/plain; charset=utf-8')
+        if path == "/static/test.txt":
+            self._write(
+                200, "static fixture payload", content_type="text/plain; charset=utf-8"
+            )
             return
 
-        if path == '/api/data.json':
-            self._write(200, '{"ok": true, "source": "deterministic-fixture"}', content_type='application/json')
+        if path == "/api/data.json":
+            self._write(
+                200,
+                '{"ok": true, "source": "deterministic-fixture"}',
+                content_type="application/json",
+            )
             return
 
-        self._write(404, '<html><head><title>Not Found</title></head><body><h1>404</h1></body></html>')
+        self._write(
+            404,
+            "<html><head><title>Not Found</title></head><body><h1>404</h1></body></html>",
+        )
 
 
-def _start_local_server(*, use_tls: bool = False, cert_file: Optional[Path] = None, key_file: Optional[Path] = None) -> Tuple[ThreadingHTTPServer, threading.Thread]:
-    server = ThreadingHTTPServer(('127.0.0.1', 0), _DeterministicTestRequestHandler)
+def _start_local_server(
+    *,
+    use_tls: bool = False,
+    cert_file: Optional[Path] = None,
+    key_file: Optional[Path] = None,
+) -> Tuple[ThreadingHTTPServer, threading.Thread]:
+    server = ThreadingHTTPServer(("127.0.0.1", 0), _DeterministicTestRequestHandler)
     server.daemon_threads = True
     if use_tls:
         context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
@@ -198,20 +233,43 @@ def _start_local_server(*, use_tls: bool = False, cert_file: Optional[Path] = No
 
 
 def _generate_self_signed_cert(tmpdir: Path) -> Optional[Tuple[Path, Path]]:
-    cert_file = tmpdir / 'local-test-cert.pem'
-    key_file = tmpdir / 'local-test-key.pem'
+    cert_file = tmpdir / "local-test-cert.pem"
+    key_file = tmpdir / "local-test-key.pem"
     command = [
-        'openssl', 'req', '-x509', '-newkey', 'rsa:2048', '-nodes',
-        '-days', '2', '-subj', '/CN=127.0.0.1',
-        '-addext', 'subjectAltName=DNS:localhost,IP:127.0.0.1',
-        '-keyout', str(key_file), '-out', str(cert_file),
+        "openssl",
+        "req",
+        "-x509",
+        "-newkey",
+        "rsa:2048",
+        "-nodes",
+        "-days",
+        "2",
+        "-subj",
+        "/CN=127.0.0.1",
+        "-addext",
+        "subjectAltName=DNS:localhost,IP:127.0.0.1",
+        "-keyout",
+        str(key_file),
+        "-out",
+        str(cert_file),
     ]
     result = subprocess.run(command, capture_output=True, text=True)
     if result.returncode != 0:
         fallback = [
-            'openssl', 'req', '-x509', '-newkey', 'rsa:2048', '-nodes',
-            '-days', '2', '-subj', '/CN=127.0.0.1',
-            '-keyout', str(key_file), '-out', str(cert_file),
+            "openssl",
+            "req",
+            "-x509",
+            "-newkey",
+            "rsa:2048",
+            "-nodes",
+            "-days",
+            "2",
+            "-subj",
+            "/CN=127.0.0.1",
+            "-keyout",
+            str(key_file),
+            "-out",
+            str(cert_file),
         ]
         result = subprocess.run(fallback, capture_output=True, text=True)
     if result.returncode != 0:
@@ -219,67 +277,73 @@ def _generate_self_signed_cert(tmpdir: Path) -> Optional[Tuple[Path, Path]]:
     return cert_file, key_file
 
 
-def _build_test_urls(base_url: str, https_base_url: Optional[str] = None) -> Dict[str, str]:
-    base = base_url.rstrip('/')
+def _build_test_urls(
+    base_url: str, https_base_url: Optional[str] = None
+) -> Dict[str, str]:
+    base = base_url.rstrip("/")
     urls = {
-        'base_url': f'{base}/',
-        'origin': base,
-        'redirect_url': f'{base}/redirect',
-        'not_found_url': f'{base}/nonexistent-page-404',
-        'linked_url': f'{base}/linked',
-        'static_file_url': f'{base}/static/test.txt',
-        'json_url': f'{base}/api/data.json',
+        "base_url": f"{base}/",
+        "origin": base,
+        "redirect_url": f"{base}/redirect",
+        "not_found_url": f"{base}/nonexistent-page-404",
+        "linked_url": f"{base}/linked",
+        "static_file_url": f"{base}/static/test.txt",
+        "json_url": f"{base}/api/data.json",
     }
     if https_base_url:
-        https_base = https_base_url.rstrip('/')
-        urls['https_base_url'] = f'{https_base}/'
-        urls['https_not_found_url'] = f'{https_base}/nonexistent-page-404'
+        https_base = https_base_url.rstrip("/")
+        urls["https_base_url"] = f"{https_base}/"
+        urls["https_not_found_url"] = f"{https_base}/nonexistent-page-404"
     return urls
 
 
 def _coerce_upstream_urls(value: Any) -> Optional[Dict[str, str]]:
-    if isinstance(value, str) and value.startswith(('http://', 'https://')):
+    if isinstance(value, str) and value.startswith(("http://", "https://")):
         return _build_test_urls(value)
     if not isinstance(value, dict):
         return None
 
     base_url = (
-        value.get('base_url')
-        or value.get('url')
-        or value.get('local_url')
-        or value.get('http_url')
+        value.get("base_url")
+        or value.get("url")
+        or value.get("local_url")
+        or value.get("http_url")
     )
-    if not isinstance(base_url, str) or not base_url.startswith(('http://', 'https://')):
+    if not isinstance(base_url, str) or not base_url.startswith(
+        ("http://", "https://")
+    ):
         return None
 
-    urls = _build_test_urls(base_url, value.get('https_base_url'))
+    urls = _build_test_urls(base_url, value.get("https_base_url"))
     for key, candidate in value.items():
-        if isinstance(candidate, str) and candidate.startswith(('http://', 'https://')):
+        if isinstance(candidate, str) and candidate.startswith(("http://", "https://")):
             urls[key] = candidate
     return urls
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def ensure_chromium_and_puppeteer_installed(tmp_path_factory):
     """Install Chromium and Puppeteer once for test sessions that require Chrome."""
-    if not os.environ.get('SNAP_DIR'):
-        os.environ['SNAP_DIR'] = str(tmp_path_factory.mktemp('chrome_test_data'))
-    if not os.environ.get('PERSONAS_DIR'):
-        os.environ['PERSONAS_DIR'] = str(tmp_path_factory.mktemp('chrome_test_personas'))
+    if not os.environ.get("SNAP_DIR"):
+        os.environ["SNAP_DIR"] = str(tmp_path_factory.mktemp("chrome_test_data"))
+    if not os.environ.get("PERSONAS_DIR"):
+        os.environ["PERSONAS_DIR"] = str(
+            tmp_path_factory.mktemp("chrome_test_personas")
+        )
 
     env = get_test_env()
     chromium_binary = install_chromium_with_hooks(env)
     if not chromium_binary:
-        raise RuntimeError('Chromium not found after install')
+        raise RuntimeError("Chromium not found after install")
 
-    os.environ['CHROME_BINARY'] = chromium_binary
-    for key in ('NODE_MODULES_DIR', 'NODE_PATH', 'PATH'):
+    os.environ["CHROME_BINARY"] = chromium_binary
+    for key in ("NODE_MODULES_DIR", "NODE_PATH", "PATH"):
         if env.get(key):
             os.environ[key] = env[key]
     return chromium_binary
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def chrome_test_urls(request, tmp_path_factory):
     """Provide deterministic test URLs, preferring a root conftest fixture when available."""
     for fixture_name in _ROOT_URL_FIXTURE_NAMES:
@@ -291,7 +355,7 @@ def chrome_test_urls(request, tmp_path_factory):
         if urls:
             return urls
 
-    server_tmpdir = tmp_path_factory.mktemp('chrome_test_server')
+    server_tmpdir = tmp_path_factory.mktemp("chrome_test_server")
     http_server, _http_thread = _start_local_server()
     https_server = None
     https_urls = None
@@ -299,11 +363,13 @@ def chrome_test_urls(request, tmp_path_factory):
     cert_pair = _generate_self_signed_cert(server_tmpdir)
     if cert_pair:
         cert_file, key_file = cert_pair
-        https_server, _https_thread = _start_local_server(use_tls=True, cert_file=cert_file, key_file=key_file)
-        https_urls = f'https://chrome-test.localhost:{https_server.server_port}'
+        https_server, _https_thread = _start_local_server(
+            use_tls=True, cert_file=cert_file, key_file=key_file
+        )
+        https_urls = f"https://chrome-test.localhost:{https_server.server_port}"
 
     urls = _build_test_urls(
-        f'http://chrome-test.localhost:{http_server.server_port}',
+        f"http://chrome-test.localhost:{http_server.server_port}",
         https_urls,
     )
     try:
@@ -317,16 +383,15 @@ def chrome_test_urls(request, tmp_path_factory):
             https_server.server_close()
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def chrome_test_url(chrome_test_urls):
-    return chrome_test_urls['base_url']
+    return chrome_test_urls["base_url"]
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def chrome_test_https_url(chrome_test_urls):
-    https_url = chrome_test_urls.get('https_base_url')
-    if not https_url:
-        pytest.skip('Local HTTPS fixture unavailable (openssl required)')
+    https_url = chrome_test_urls.get("https_base_url")
+    assert https_url, "Local HTTPS fixture unavailable (openssl required)"
     return https_url
 
 
@@ -336,7 +401,9 @@ def chrome_test_https_url(chrome_test_urls):
 # =============================================================================
 
 
-def _call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]:
+def _call_chrome_utils(
+    command: str, *args: str, env: Optional[dict] = None
+) -> Tuple[int, str, str]:
     """Call chrome_utils.js CLI command (internal helper).
 
     This is the central dispatch for calling the JS utilities from Python.
@@ -351,17 +418,40 @@ def _call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) ->
     Returns:
         Tuple of (returncode, stdout, stderr)
     """
-    cmd = ['node', str(CHROME_UTILS), command] + list(args)
+    cmd = ["node", str(CHROME_UTILS), command] + list(args)
     result = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        timeout=30,
-        env=env or os.environ.copy()
+        cmd, capture_output=True, text=True, timeout=30, env=env or os.environ.copy()
     )
     return result.returncode, result.stdout, result.stderr
 
 
+def wait_for_extensions_metadata(
+    chrome_dir: Path, timeout_seconds: int = 10
+) -> List[Dict[str, Any]]:
+    """Wait for extensions.json metadata via chrome_utils.js and return parsed entries."""
+    timeout_ms = max(1, int(timeout_seconds * 1000))
+    returncode, stdout, stderr = _call_chrome_utils(
+        "waitForExtensionsMetadata",
+        str(chrome_dir),
+        str(timeout_ms),
+    )
+    if returncode != 0:
+        raise AssertionError(
+            f"waitForExtensionsMetadata failed for {chrome_dir}: {stderr or stdout}"
+        )
+    try:
+        parsed = json.loads(stdout)
+    except json.JSONDecodeError as exc:
+        raise AssertionError(
+            f"Invalid JSON from waitForExtensionsMetadata: {stdout}"
+        ) from exc
+    if not isinstance(parsed, list) or not parsed:
+        raise AssertionError(
+            f"Expected non-empty extension metadata list for {chrome_dir}, got: {parsed}"
+        )
+    return parsed
+
+
 def get_plugin_dir(test_file: str) -> Path:
     """Get the plugin directory from a test file path.
 
@@ -402,20 +492,20 @@ def get_machine_type() -> str:
     Tries chrome_utils.js first, falls back to Python computation.
     """
     # Try JS first (single source of truth)
-    returncode, stdout, stderr = _call_chrome_utils('getMachineType')
+    returncode, stdout, stderr = _call_chrome_utils("getMachineType")
     if returncode == 0 and stdout.strip():
         return stdout.strip()
 
     # Fallback to Python computation
-    if os.environ.get('MACHINE_TYPE'):
-        return os.environ['MACHINE_TYPE']
+    if os.environ.get("MACHINE_TYPE"):
+        return os.environ["MACHINE_TYPE"]
 
     machine = platform.machine().lower()
     system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
+    if machine in ("arm64", "aarch64"):
+        machine = "arm64"
+    elif machine in ("x86_64", "amd64"):
+        machine = "x86_64"
     return f"{machine}-{system}"
 
 
@@ -427,14 +517,14 @@ def get_lib_dir() -> Path:
     Tries chrome_utils.js first, falls back to Python computation.
     """
     # Try JS first
-    returncode, stdout, stderr = _call_chrome_utils('getLibDir')
+    returncode, stdout, stderr = _call_chrome_utils("getLibDir")
     if returncode == 0 and stdout.strip():
         return Path(stdout.strip())
 
     # Fallback to Python
-    if os.environ.get('LIB_DIR'):
-        return Path(os.environ['LIB_DIR'])
-    return Path.home() / '.config' / 'abx' / 'lib'
+    if os.environ.get("LIB_DIR"):
+        return Path(os.environ["LIB_DIR"])
+    return Path.home() / ".config" / "abx" / "lib"
 
 
 def get_node_modules_dir() -> Path:
@@ -445,15 +535,15 @@ def get_node_modules_dir() -> Path:
     Tries chrome_utils.js first, falls back to Python computation.
     """
     # Try JS first
-    returncode, stdout, stderr = _call_chrome_utils('getNodeModulesDir')
+    returncode, stdout, stderr = _call_chrome_utils("getNodeModulesDir")
     if returncode == 0 and stdout.strip():
         return Path(stdout.strip())
 
     # Fallback to Python
-    if os.environ.get('NODE_MODULES_DIR'):
-        return Path(os.environ['NODE_MODULES_DIR'])
+    if os.environ.get("NODE_MODULES_DIR"):
+        return Path(os.environ["NODE_MODULES_DIR"])
     lib_dir = get_lib_dir()
-    return lib_dir / 'npm' / 'node_modules'
+    return lib_dir / "npm" / "node_modules"
 
 
 def get_extensions_dir() -> str:
@@ -464,16 +554,18 @@ def get_extensions_dir() -> str:
     Tries chrome_utils.js first, falls back to Python computation.
     """
     try:
-        returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir')
+        returncode, stdout, stderr = _call_chrome_utils("getExtensionsDir")
         if returncode == 0 and stdout.strip():
             return stdout.strip()
     except subprocess.TimeoutExpired:
         pass  # Fall through to default computation
 
     # Fallback to default computation if JS call fails
-    personas_dir = os.environ.get('PERSONAS_DIR') or str(Path.home() / '.config' / 'abx' / 'personas')
-    persona = os.environ.get('ACTIVE_PERSONA', 'Default')
-    return str(Path(personas_dir) / persona / 'chrome_extensions')
+    personas_dir = os.environ.get("PERSONAS_DIR") or str(
+        Path.home() / ".config" / "abx" / "personas"
+    )
+    persona = os.environ.get("ACTIVE_PERSONA", "Default")
+    return str(Path(personas_dir) / persona / "chrome_extensions")
 
 
 def link_puppeteer_cache(lib_dir: Path) -> None:
@@ -482,12 +574,12 @@ def link_puppeteer_cache(lib_dir: Path) -> None:
     Avoids repeated Chromium downloads across tests by reusing the
     default Puppeteer cache directory.
     """
-    cache_dir = lib_dir / 'puppeteer'
+    cache_dir = lib_dir / "puppeteer"
     cache_dir.mkdir(parents=True, exist_ok=True)
 
     candidates = [
-        Path.home() / 'Library' / 'Caches' / 'puppeteer',
-        Path.home() / '.cache' / 'puppeteer',
+        Path.home() / "Library" / "Caches" / "puppeteer",
+        Path.home() / ".cache" / "puppeteer",
     ]
     for src_root in candidates:
         if not src_root.exists():
@@ -522,8 +614,8 @@ def find_chromium(data_dir: Optional[str] = None) -> Optional[str]:
     """
     env = os.environ.copy()
     if data_dir:
-        env['SNAP_DIR'] = str(data_dir)
-    returncode, stdout, stderr = _call_chrome_utils('findChromium', env=env)
+        env["SNAP_DIR"] = str(data_dir)
+    returncode, stdout, stderr = _call_chrome_utils("findChromium", env=env)
     if returncode == 0 and stdout.strip():
         return stdout.strip()
     return None
@@ -549,7 +641,7 @@ def kill_chrome(pid: int, output_dir: Optional[str] = None) -> bool:
     args = [str(pid)]
     if output_dir:
         args.append(str(output_dir))
-    returncode, stdout, stderr = _call_chrome_utils('killChrome', *args)
+    returncode, stdout, stderr = _call_chrome_utils("killChrome", *args)
     return returncode == 0
 
 
@@ -564,7 +656,7 @@ def get_test_env() -> dict:
     env = os.environ.copy()
 
     # Try to get all paths from JS (single source of truth)
-    returncode, stdout, stderr = _call_chrome_utils('getTestEnv')
+    returncode, stdout, stderr = _call_chrome_utils("getTestEnv")
     if returncode == 0 and stdout.strip():
         try:
             js_env = json.loads(stdout)
@@ -575,12 +667,12 @@ def get_test_env() -> dict:
 
     # Fallback to Python computation
     lib_dir = get_lib_dir()
-    env['LIB_DIR'] = str(lib_dir)
-    env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
-    env['MACHINE_TYPE'] = get_machine_type()
-    env.setdefault('SNAP_DIR', str(Path.cwd()))
-    env.setdefault('CRAWL_DIR', str(Path.cwd()))
-    env.setdefault('PERSONAS_DIR', str(get_personas_dir()))
+    env["LIB_DIR"] = str(lib_dir)
+    env["NODE_MODULES_DIR"] = str(get_node_modules_dir())
+    env["MACHINE_TYPE"] = get_machine_type()
+    env.setdefault("SNAP_DIR", str(Path.cwd()))
+    env.setdefault("CRAWL_DIR", str(Path.cwd()))
+    env.setdefault("PERSONAS_DIR", str(get_personas_dir()))
     return env
 
 
@@ -619,6 +711,7 @@ def _get_node_modules_dir_cached() -> Path:
 # Usage: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
 class _LazyPath:
     """Lazy path that computes value on first access."""
+
     def __init__(self, getter):
         self._getter = getter
         self._value = None
@@ -682,14 +775,14 @@ def run_hook(
         env = get_test_env()
 
     # Determine interpreter based on file extension
-    if hook_script.suffix == '.py':
+    if hook_script.suffix == ".py":
         cmd = [sys.executable, str(hook_script)]
-    elif hook_script.suffix == '.js':
-        cmd = ['node', str(hook_script)]
+    elif hook_script.suffix == ".js":
+        cmd = ["node", str(hook_script)]
     else:
         cmd = [str(hook_script)]
 
-    cmd.extend([f'--url={url}', f'--snapshot-id={snapshot_id}'])
+    cmd.extend([f"--url={url}", f"--snapshot-id={snapshot_id}"])
     if extra_args:
         cmd.extend(extra_args)
 
@@ -699,12 +792,14 @@ def run_hook(
         capture_output=True,
         text=True,
         env=env,
-        timeout=timeout
+        timeout=timeout,
     )
     return result.returncode, result.stdout, result.stderr
 
 
-def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optional[Dict[str, Any]]:
+def parse_jsonl_output(
+    stdout: str, record_type: str = "ArchiveResult"
+) -> Optional[Dict[str, Any]]:
     """Parse JSONL output from hook stdout and return the specified record type.
 
     Usage:
@@ -719,13 +814,13 @@ def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optio
     Returns:
         The parsed JSON dict or None if not found
     """
-    for line in stdout.strip().split('\n'):
+    for line in stdout.strip().split("\n"):
         line = line.strip()
-        if not line.startswith('{'):
+        if not line.startswith("{"):
             continue
         try:
             record = json.loads(line)
-            if record.get('type') == record_type:
+            if record.get("type") == record_type:
                 return record
         except json.JSONDecodeError:
             continue
@@ -735,9 +830,9 @@ def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optio
 def parse_jsonl_records(stdout: str) -> List[Dict[str, Any]]:
     """Parse all JSONL records from stdout."""
     records: List[Dict[str, Any]] = []
-    for line in stdout.strip().split('\n'):
+    for line in stdout.strip().split("\n"):
         line = line.strip()
-        if not line.startswith('{'):
+        if not line.startswith("{"):
             continue
         try:
             records.append(json.loads(line))
@@ -749,19 +844,62 @@ def parse_jsonl_records(stdout: str) -> List[Dict[str, Any]]:
 def apply_machine_updates(records: List[Dict[str, Any]], env: dict) -> None:
     """Apply Machine update records to env dict in-place."""
     for record in records:
-        if record.get('type') != 'Machine':
+        if record.get("type") != "Machine":
             continue
-        config = record.get('config')
+        config = record.get("config")
         if not isinstance(config, dict):
             continue
         env.update(config)
 
 
-def install_chromium_with_hooks(env: dict, timeout: int = 300) -> str:
-    """Install Chromium via chrome crawl hook + puppeteer/npm hooks.
+@contextmanager
+def _chromium_install_lock(env: dict):
+    """Serialize shared Chromium/Puppeteer installs across parallel test processes."""
+    lib_dir = Path(env.get("LIB_DIR") or get_lib_dir())
+    lib_dir.mkdir(parents=True, exist_ok=True)
+    lock_path = lib_dir / ".chromium_install.lock"
+    with lock_path.open("w") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
+        try:
+            yield
+        finally:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+
+
+def _resolve_existing_chromium(env: dict) -> Optional[str]:
+    """Return an existing Chromium path if already installed and valid."""
+    from_env = env.get("CHROME_BINARY")
+    if from_env and Path(from_env).exists():
+        return from_env
+    returncode, stdout, _stderr = _call_chrome_utils("findChromium", env=env)
+    if returncode == 0 and stdout.strip():
+        candidate = stdout.strip()
+        if Path(candidate).exists():
+            return candidate
+    return None
+
+
+def _has_puppeteer_module(env: dict) -> bool:
+    """Return True if Node can resolve the puppeteer package in this env."""
+    probe_env = env.copy()
+    node_modules_dir = probe_env.get("NODE_MODULES_DIR", "").strip()
+    if node_modules_dir and not probe_env.get("NODE_PATH"):
+        probe_env["NODE_PATH"] = node_modules_dir
+    result = subprocess.run(
+        ["node", "-e", "require.resolve('puppeteer')"],
+        capture_output=True,
+        text=True,
+        timeout=20,
+        env=probe_env,
+    )
+    return result.returncode == 0
+
+
+def _ensure_puppeteer_with_hooks(env: dict, timeout: int) -> None:
+    """Install puppeteer npm package using plugin hooks if not already available."""
+    if _has_puppeteer_module(env):
+        return
 
-    Returns absolute path to Chromium binary.
-    """
     puppeteer_result = subprocess.run(
         [sys.executable, str(PUPPETEER_CRAWL_HOOK)],
         capture_output=True,
@@ -770,23 +908,27 @@ def install_chromium_with_hooks(env: dict, timeout: int = 300) -> str:
         env=env,
     )
     if puppeteer_result.returncode != 0:
-        raise RuntimeError(f"Puppeteer crawl hook failed: {puppeteer_result.stderr}")
+        raise RuntimeError(
+            f"Puppeteer crawl hook failed: {puppeteer_result.stderr or puppeteer_result.stdout}"
+        )
 
-    puppeteer_record = parse_jsonl_output(puppeteer_result.stdout, record_type='Binary') or {}
-    if not puppeteer_record or puppeteer_record.get('name') != 'puppeteer':
+    puppeteer_record = (
+        parse_jsonl_output(puppeteer_result.stdout, record_type="Binary") or {}
+    )
+    if not puppeteer_record or puppeteer_record.get("name") != "puppeteer":
         raise RuntimeError("Puppeteer Binary record not emitted by crawl hook")
 
     npm_cmd = [
         sys.executable,
         str(NPM_BINARY_HOOK),
-        '--machine-id=test-machine',
-        '--binary-id=test-puppeteer',
-        '--name=puppeteer',
+        "--machine-id=test-machine",
+        "--binary-id=test-puppeteer",
+        "--name=puppeteer",
         f"--binproviders={puppeteer_record.get('binproviders', '*')}",
     ]
-    puppeteer_overrides = puppeteer_record.get('overrides')
+    puppeteer_overrides = puppeteer_record.get("overrides")
     if puppeteer_overrides:
-        npm_cmd.append(f'--overrides={json.dumps(puppeteer_overrides)}')
+        npm_cmd.append(f"--overrides={json.dumps(puppeteer_overrides)}")
 
     npm_result = subprocess.run(
         npm_cmd,
@@ -796,62 +938,96 @@ def install_chromium_with_hooks(env: dict, timeout: int = 300) -> str:
         env=env,
     )
     if npm_result.returncode != 0:
-        raise RuntimeError(f"Npm install failed: {npm_result.stderr}")
+        raise RuntimeError(
+            f"Npm puppeteer install failed:\nstdout: {npm_result.stdout}\nstderr: {npm_result.stderr}"
+        )
 
     apply_machine_updates(parse_jsonl_records(npm_result.stdout), env)
+    if env.get("NODE_MODULES_DIR") and not env.get("NODE_PATH"):
+        env["NODE_PATH"] = env["NODE_MODULES_DIR"]
 
-    chrome_result = subprocess.run(
-        [sys.executable, str(CHROME_INSTALL_HOOK)],
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-        env=env,
-    )
-    if chrome_result.returncode != 0:
-        raise RuntimeError(f"Chrome install hook failed: {chrome_result.stderr}")
+    if not _has_puppeteer_module(env):
+        raise RuntimeError(
+            "Puppeteer install hook completed but require.resolve('puppeteer') still fails"
+        )
 
-    chrome_record = parse_jsonl_output(chrome_result.stdout, record_type='Binary') or {}
-    if not chrome_record or chrome_record.get('name') not in ('chromium', 'chrome'):
-        raise RuntimeError("Chrome Binary record not emitted by crawl hook")
 
-    chromium_cmd = [
-        sys.executable,
-        str(PUPPETEER_BINARY_HOOK),
-        '--machine-id=test-machine',
-        '--binary-id=test-chromium',
-        f"--name={chrome_record.get('name', 'chromium')}",
-        f"--binproviders={chrome_record.get('binproviders', '*')}",
-    ]
-    chrome_overrides = chrome_record.get('overrides')
-    if chrome_overrides:
-        chromium_cmd.append(f'--overrides={json.dumps(chrome_overrides)}')
+def install_chromium_with_hooks(env: dict, timeout: int = 300) -> str:
+    """Install Chromium via chrome crawl hook + puppeteer/npm hooks.
 
-    result = subprocess.run(
-        chromium_cmd,
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-        env=env,
-    )
-    if result.returncode != 0:
-        raise RuntimeError(f"Puppeteer chromium install failed: {result.stderr}")
+    Returns absolute path to Chromium binary.
+    """
+    with _chromium_install_lock(env):
+        # Always ensure JS dependency exists, even if Chromium already exists
+        # on the host. chrome_launch requires `require('puppeteer')`.
+        _ensure_puppeteer_with_hooks(env, timeout=timeout)
+
+        existing = _resolve_existing_chromium(env)
+        if existing:
+            env["CHROME_BINARY"] = existing
+            return existing
+
+        chrome_result = subprocess.run(
+            [sys.executable, str(CHROME_INSTALL_HOOK)],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            env=env,
+        )
+        if chrome_result.returncode != 0:
+            raise RuntimeError(f"Chrome install hook failed: {chrome_result.stderr}")
 
-    records = parse_jsonl_records(result.stdout)
-    chromium_record = None
-    for record in records:
-        if record.get('type') == 'Binary' and record.get('name') in ('chromium', 'chrome'):
-            chromium_record = record
-            break
-    if not chromium_record:
-        chromium_record = parse_jsonl_output(result.stdout, record_type='Binary')
+        chrome_record = (
+            parse_jsonl_output(chrome_result.stdout, record_type="Binary") or {}
+        )
+        if not chrome_record or chrome_record.get("name") not in ("chromium", "chrome"):
+            raise RuntimeError("Chrome Binary record not emitted by crawl hook")
+
+        chromium_cmd = [
+            sys.executable,
+            str(PUPPETEER_BINARY_HOOK),
+            "--machine-id=test-machine",
+            "--binary-id=test-chromium",
+            f"--name={chrome_record.get('name', 'chromium')}",
+            f"--binproviders={chrome_record.get('binproviders', '*')}",
+        ]
+        chrome_overrides = chrome_record.get("overrides")
+        if chrome_overrides:
+            chromium_cmd.append(f"--overrides={json.dumps(chrome_overrides)}")
 
-    chromium_path = chromium_record.get('abspath')
-    if not chromium_path or not Path(chromium_path).exists():
-        raise RuntimeError(f"Chromium binary not found after install: {chromium_path}")
+        result = subprocess.run(
+            chromium_cmd,
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            env=env,
+        )
+        if result.returncode != 0:
+            raise RuntimeError(f"Puppeteer chromium install failed: {result.stderr}")
+
+        records = parse_jsonl_records(result.stdout)
+        chromium_record = None
+        for record in records:
+            if record.get("type") == "Binary" and record.get("name") in (
+                "chromium",
+                "chrome",
+            ):
+                chromium_record = record
+                break
+        if not chromium_record:
+            chromium_record = parse_jsonl_output(result.stdout, record_type="Binary")
+        if not chromium_record:
+            raise RuntimeError("Chromium Binary record not found after install")
+
+        chromium_path = chromium_record.get("abspath")
+        if not isinstance(chromium_path, str) or not Path(chromium_path).exists():
+            raise RuntimeError(
+                f"Chromium binary not found after install: {chromium_path}"
+            )
 
-    env['CHROME_BINARY'] = chromium_path
-    apply_machine_updates(records, env)
-    return chromium_path
+        env["CHROME_BINARY"] = chromium_path
+        apply_machine_updates(records, env)
+        return chromium_path
 
 
 def run_hook_and_parse(
@@ -871,8 +1047,13 @@ def run_hook_and_parse(
         Tuple of (returncode, parsed_result_or_none, stderr)
     """
     returncode, stdout, stderr = run_hook(
-        hook_script, url, snapshot_id,
-        cwd=cwd, env=env, timeout=timeout, extra_args=extra_args
+        hook_script,
+        url,
+        snapshot_id,
+        cwd=cwd,
+        env=env,
+        timeout=timeout,
+        extra_args=extra_args,
     )
     result = parse_jsonl_output(stdout)
     return returncode, result, stderr
@@ -906,48 +1087,50 @@ def setup_test_env(tmpdir: Path) -> dict:
     # Determine machine type (matches archivebox.config.paths.get_machine_type())
     machine = platform.machine().lower()
     system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
+    if machine in ("arm64", "aarch64"):
+        machine = "arm64"
+    elif machine in ("x86_64", "amd64"):
+        machine = "x86_64"
     machine_type = f"{machine}-{system}"
 
     tmpdir = Path(tmpdir).resolve()
 
     # Keep crawl/snap state rooted in the caller's tmpdir so every test is isolated.
-    snap_dir = tmpdir / 'snap'
+    snap_dir = tmpdir / "snap"
     lib_dir = get_lib_dir()
-    npm_dir = lib_dir / 'npm'
-    npm_bin_dir = npm_dir / '.bin'
-    node_modules_dir = npm_dir / 'node_modules'
+    npm_dir = lib_dir / "npm"
+    npm_bin_dir = npm_dir / ".bin"
+    node_modules_dir = npm_dir / "node_modules"
 
     personas_dir = get_personas_dir()
-    chrome_extensions_dir = personas_dir / 'Default' / 'chrome_extensions'
+    chrome_extensions_dir = personas_dir / "Default" / "chrome_extensions"
 
     # Create all directories
     node_modules_dir.mkdir(parents=True, exist_ok=True)
     npm_bin_dir.mkdir(parents=True, exist_ok=True)
     chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
     snap_dir.mkdir(parents=True, exist_ok=True)
-    crawl_dir = tmpdir / 'crawl'
+    crawl_dir = tmpdir / "crawl"
     crawl_dir.mkdir(parents=True, exist_ok=True)
 
     # Build complete env dict
     env = os.environ.copy()
-    env.update({
-        'SNAP_DIR': str(snap_dir),
-        'CRAWL_DIR': str(crawl_dir),
-        'PERSONAS_DIR': str(personas_dir),
-        'LIB_DIR': str(lib_dir),
-        'MACHINE_TYPE': machine_type,
-        'NPM_BIN_DIR': str(npm_bin_dir),
-        'NODE_MODULES_DIR': str(node_modules_dir),
-        'CHROME_EXTENSIONS_DIR': str(chrome_extensions_dir),
-    })
+    env.update(
+        {
+            "SNAP_DIR": str(snap_dir),
+            "CRAWL_DIR": str(crawl_dir),
+            "PERSONAS_DIR": str(personas_dir),
+            "LIB_DIR": str(lib_dir),
+            "MACHINE_TYPE": machine_type,
+            "NPM_BIN_DIR": str(npm_bin_dir),
+            "NODE_MODULES_DIR": str(node_modules_dir),
+            "CHROME_EXTENSIONS_DIR": str(chrome_extensions_dir),
+        }
+    )
 
     # Only set headless if not already in environment (allow override for debugging)
-    if 'CHROME_HEADLESS' not in os.environ:
-        env['CHROME_HEADLESS'] = 'true'
+    if "CHROME_HEADLESS" not in os.environ:
+        env["CHROME_HEADLESS"] = "true"
 
     try:
         install_chromium_with_hooks(env)
@@ -956,7 +1139,9 @@ def setup_test_env(tmpdir: Path) -> dict:
     return env
 
 
-def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple[subprocess.Popen, str]:
+def launch_chromium_session(
+    env: dict, chrome_dir: Path, crawl_id: str, timeout: int = 30
+) -> Tuple[subprocess.Popen, str]:
     """Launch Chromium and return (process, cdp_url).
 
     This launches Chrome using the chrome launch hook and waits for the CDP URL
@@ -966,6 +1151,7 @@ def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple
         env: Environment dict (from setup_test_env)
         chrome_dir: Directory for Chrome to write its files (cdp_url.txt, chrome.pid, etc.)
         crawl_id: ID for the crawl
+        timeout: Maximum seconds to wait for cdp_url.txt
 
     Returns:
         Tuple of (chrome_launch_process, cdp_url)
@@ -980,25 +1166,27 @@ def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple
 
     # chrome_launch always writes to <CRAWL_DIR>/chrome, so force env/cwd to match.
     launch_env = env.copy()
-    launch_env['CRAWL_DIR'] = str(crawl_dir)
-    env['CRAWL_DIR'] = str(crawl_dir)
+    launch_env["CRAWL_DIR"] = str(crawl_dir)
+    env["CRAWL_DIR"] = str(crawl_dir)
 
     chrome_launch_process = subprocess.Popen(
-        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
+        ["node", str(CHROME_LAUNCH_HOOK), f"--crawl-id={crawl_id}"],
         cwd=str(chrome_dir),
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         text=True,
-        env=launch_env
+        env=launch_env,
     )
 
     # Wait for Chromium to launch and CDP URL to be available
     cdp_url = None
-    for _ in range(30):
+    for _ in range(timeout):
         if chrome_launch_process.poll() is not None:
             stdout, stderr = chrome_launch_process.communicate()
-            raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-        cdp_file = chrome_dir / 'cdp_url.txt'
+            raise RuntimeError(
+                f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}"
+            )
+        cdp_file = chrome_dir / "cdp_url.txt"
         if cdp_file.exists():
             cdp_url = cdp_file.read_text().strip()
             if cdp_url:
@@ -1007,12 +1195,14 @@ def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple
 
     if not cdp_url:
         chrome_launch_process.kill()
-        raise RuntimeError("Chromium CDP URL not found after 30s")
+        raise RuntimeError(f"Chromium CDP URL not found after {timeout}s")
 
     return chrome_launch_process, cdp_url
 
 
-def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: Path) -> None:
+def kill_chromium_session(
+    chrome_launch_process: subprocess.Popen, chrome_dir: Path
+) -> None:
     """Clean up Chromium process launched by launch_chromium_session.
 
     Uses chrome_utils.js killChrome for proper process group handling.
@@ -1029,7 +1219,7 @@ def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: P
         pass
 
     # Read PID and use JS to kill with proper cleanup
-    chrome_pid_file = chrome_dir / 'chrome.pid'
+    chrome_pid_file = chrome_dir / "chrome.pid"
     if chrome_pid_file.exists():
         try:
             chrome_pid = int(chrome_pid_file.read_text().strip())
@@ -1058,7 +1248,9 @@ def chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
     """
     chrome_launch_process = None
     try:
-        chrome_launch_process, cdp_url = launch_chromium_session(env, chrome_dir, crawl_id)
+        chrome_launch_process, cdp_url = launch_chromium_session(
+            env, chrome_dir, crawl_id
+        )
         yield chrome_launch_process, cdp_url
     finally:
         if chrome_launch_process:
@@ -1071,7 +1263,11 @@ def chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
 # =============================================================================
 
 
-def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chrome_dir: Optional[Path] = None) -> None:
+def cleanup_chrome(
+    chrome_launch_process: subprocess.Popen,
+    chrome_pid: int,
+    chrome_dir: Optional[Path] = None,
+) -> None:
     """Clean up Chrome processes using chrome_utils.js killChrome.
 
     Uses the centralized kill logic from chrome_utils.js which handles:
@@ -1098,9 +1294,9 @@ def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chr
 @contextmanager
 def chrome_session(
     tmpdir: Path,
-    crawl_id: str = 'test-crawl',
-    snapshot_id: str = 'test-snapshot',
-    test_url: str = 'about:blank',
+    crawl_id: str = "test-crawl",
+    snapshot_id: str = "test-snapshot",
+    test_url: str = "about:blank",
     navigate: bool = True,
     timeout: int = 15,
 ):
@@ -1137,96 +1333,121 @@ def chrome_session(
         # Create proper directory structure in tmpdir
         machine = platform.machine().lower()
         system = platform.system().lower()
-        if machine in ('arm64', 'aarch64'):
-            machine = 'arm64'
-        elif machine in ('x86_64', 'amd64'):
-            machine = 'x86_64'
+        if machine in ("arm64", "aarch64"):
+            machine = "arm64"
+        elif machine in ("x86_64", "amd64"):
+            machine = "x86_64"
         machine_type = f"{machine}-{system}"
 
         tmpdir = Path(tmpdir).resolve()
         # Model real runtime layout: one crawl root + one snapshot root per session.
-        crawl_dir = tmpdir / 'crawl' / crawl_id
-        snap_dir = tmpdir / 'snap' / snapshot_id
+        crawl_dir = tmpdir / "crawl" / crawl_id
+        snap_dir = tmpdir / "snap" / snapshot_id
         personas_dir = get_personas_dir()
-        lib_dir = get_lib_dir()
-        npm_dir = lib_dir / 'npm'
-        node_modules_dir = npm_dir / 'node_modules'
-        puppeteer_cache_dir = lib_dir / 'puppeteer'
+        env = os.environ.copy()
+
+        # Prefer an already-provisioned NODE_MODULES_DIR (set by session-level chrome fixture)
+        # so we don't force per-test reinstall under tmp LIB_DIR paths.
+        existing_node_modules = env.get("NODE_MODULES_DIR")
+        if existing_node_modules and Path(existing_node_modules).exists():
+            node_modules_dir = Path(existing_node_modules).resolve()
+            npm_dir = node_modules_dir.parent
+            lib_dir = npm_dir.parent
+        else:
+            lib_dir = get_lib_dir()
+            npm_dir = lib_dir / "npm"
+            node_modules_dir = npm_dir / "node_modules"
+        puppeteer_cache_dir = lib_dir / "puppeteer"
 
         # Create lib structure for puppeteer installation
         node_modules_dir.mkdir(parents=True, exist_ok=True)
 
         # Create crawl and snapshot directories
         crawl_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir(parents=True, exist_ok=True)
 
         # Build env with tmpdir-specific paths
-        env = os.environ.copy()
         snap_dir.mkdir(parents=True, exist_ok=True)
         personas_dir.mkdir(parents=True, exist_ok=True)
 
-        env.update({
-            'SNAP_DIR': str(snap_dir),
-            'CRAWL_DIR': str(crawl_dir),
-            'PERSONAS_DIR': str(personas_dir),
-            'LIB_DIR': str(lib_dir),
-            'MACHINE_TYPE': machine_type,
-            'NODE_MODULES_DIR': str(node_modules_dir),
-            'NODE_PATH': str(node_modules_dir),
-            'NPM_BIN_DIR': str(npm_dir / '.bin'),
-            'CHROME_HEADLESS': 'true',
-            'PUPPETEER_CACHE_DIR': str(puppeteer_cache_dir),
-        })
+        env.update(
+            {
+                "SNAP_DIR": str(snap_dir),
+                "CRAWL_DIR": str(crawl_dir),
+                "PERSONAS_DIR": str(personas_dir),
+                "LIB_DIR": str(lib_dir),
+                "MACHINE_TYPE": machine_type,
+                "NODE_MODULES_DIR": str(node_modules_dir),
+                "NODE_PATH": str(node_modules_dir),
+                "NPM_BIN_DIR": str(npm_dir / ".bin"),
+                "CHROME_HEADLESS": "true",
+                "PUPPETEER_CACHE_DIR": str(puppeteer_cache_dir),
+            }
+        )
 
         # Reuse system Puppeteer cache to avoid redundant Chromium downloads
         link_puppeteer_cache(lib_dir)
 
-        # Install Chromium via npm + puppeteer hooks using normal Binary flow
-        install_chromium_with_hooks(env)
+        # Reuse already-provisioned Chromium when available (session fixture sets CHROME_BINARY).
+        # Falling back to hook-based install on each test is slow and can hang on flaky networks.
+        chrome_binary = env.get("CHROME_BINARY")
+        if not chrome_binary or not Path(chrome_binary).exists():
+            chrome_binary = install_chromium_with_hooks(env)
+            env["CHROME_BINARY"] = chrome_binary
 
         # Launch Chrome at crawl level
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
+            ["node", str(CHROME_LAUNCH_HOOK), f"--crawl-id={crawl_id}"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=env
+            env=env,
         )
 
         # Wait for Chrome launch state files from the crawl-level session.
         for i in range(timeout):
             if chrome_launch_process.poll() is not None:
                 stdout, stderr = chrome_launch_process.communicate()
-                raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-            if (chrome_dir / 'cdp_url.txt').exists() and (chrome_dir / 'chrome.pid').exists():
+                raise RuntimeError(
+                    f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}"
+                )
+            if (chrome_dir / "cdp_url.txt").exists() and (
+                chrome_dir / "chrome.pid"
+            ).exists():
                 break
             time.sleep(1)
 
-        if not (chrome_dir / 'cdp_url.txt').exists():
+        if not (chrome_dir / "cdp_url.txt").exists():
             raise RuntimeError(f"Chrome CDP URL not found after {timeout}s")
 
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
 
         # Create snapshot directory structure
         snap_dir.mkdir(parents=True, exist_ok=True)
-        snapshot_chrome_dir = snap_dir / 'chrome'
+        snapshot_chrome_dir = snap_dir / "chrome"
         snapshot_chrome_dir.mkdir(parents=True, exist_ok=True)
 
         # Create tab. We explicitly pin both CRAWL_DIR and SNAP_DIR so hook state
         # files land in this session's isolated tmp tree.
         tab_env = env.copy()
-        tab_env['CRAWL_DIR'] = str(crawl_dir)
-        tab_env['SNAP_DIR'] = str(snap_dir)
+        tab_env["CRAWL_DIR"] = str(crawl_dir)
+        tab_env["SNAP_DIR"] = str(snap_dir)
         try:
             result = subprocess.run(
-                ['node', str(CHROME_TAB_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}', f'--crawl-id={crawl_id}'],
+                [
+                    "node",
+                    str(CHROME_TAB_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                    f"--crawl-id={crawl_id}",
+                ],
                 cwd=str(snapshot_chrome_dir),
                 capture_output=True,
                 text=True,
                 timeout=60,
-                env=tab_env
+                env=tab_env,
             )
             if result.returncode != 0:
                 cleanup_chrome(chrome_launch_process, chrome_pid)
@@ -1236,18 +1457,25 @@ def chrome_session(
             raise RuntimeError("Tab creation timed out after 60s")
 
         # Navigate to URL if requested
-        if navigate and CHROME_NAVIGATE_HOOK and test_url != 'about:blank':
+        if navigate and CHROME_NAVIGATE_HOOK and test_url != "about:blank":
             try:
                 result = subprocess.run(
-                    ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(CHROME_NAVIGATE_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(snapshot_chrome_dir),
                     capture_output=True,
                     text=True,
                     timeout=120,
-                    env=tab_env
+                    env=tab_env,
                 )
                 if result.returncode != 0:
-                    cleanup_chrome(chrome_launch_process, chrome_pid, chrome_dir=chrome_dir)
+                    cleanup_chrome(
+                        chrome_launch_process, chrome_pid, chrome_dir=chrome_dir
+                    )
                     raise RuntimeError(f"Navigation failed: {result.stderr}")
             except subprocess.TimeoutExpired:
                 cleanup_chrome(chrome_launch_process, chrome_pid, chrome_dir=chrome_dir)
diff --git a/abx_plugins/plugins/chrome/tests/test_chrome.py b/abx_plugins/plugins/chrome/tests/test_chrome.py
index 314eb37..4c73af2 100644
--- a/abx_plugins/plugins/chrome/tests/test_chrome.py
+++ b/abx_plugins/plugins/chrome/tests/test_chrome.py
@@ -20,98 +20,35 @@
 import os
 import signal
 import subprocess
-import sys
 import time
 from pathlib import Path
 import pytest
+
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
 import tempfile
 
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
     find_chromium_binary,
-    ensure_chromium_and_puppeteer_installed,
-    chrome_test_url,
-    chrome_test_urls,
-    CHROME_PLUGIN_DIR as PLUGIN_DIR,
     CHROME_LAUNCH_HOOK,
     CHROME_TAB_HOOK,
     CHROME_NAVIGATE_HOOK,
+    CHROME_UTILS,
 )
 
-def _get_cookies_via_cdp(port: int, env: dict) -> list[dict]:
-    node_script = r"""
-const http = require('http');
-const WebSocket = require('ws');
-const port = process.env.CDP_PORT;
-
-function getTargets() {
-  return new Promise((resolve, reject) => {
-    const req = http.get(`http://chrome-cdp.localhost:${port}/json/list`, (res) => {
-      let data = '';
-      res.on('data', (chunk) => (data += chunk));
-      res.on('end', () => {
-        try {
-          resolve(JSON.parse(data));
-        } catch (e) {
-          reject(e);
-        }
-      });
-    });
-    req.on('error', reject);
-  });
-}
-
-(async () => {
-  const targets = await getTargets();
-  const pageTarget = targets.find(t => t.type === 'page') || targets[0];
-  if (!pageTarget) {
-    console.error('No page target found');
-    process.exit(2);
-  }
-
-  const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
-  const timer = setTimeout(() => {
-    console.error('Timeout waiting for cookies');
-    process.exit(3);
-  }, 10000);
-
-  ws.on('open', () => {
-    ws.send(JSON.stringify({ id: 1, method: 'Network.getAllCookies' }));
-  });
-
-  ws.on('message', (data) => {
-    const msg = JSON.parse(data);
-    if (msg.id === 1) {
-      clearTimeout(timer);
-      ws.close();
-      if (!msg.result || !msg.result.cookies) {
-        console.error('No cookies in response');
-        process.exit(4);
-      }
-      process.stdout.write(JSON.stringify(msg.result.cookies));
-      process.exit(0);
-    }
-  });
-
-  ws.on('error', (err) => {
-    console.error(String(err));
-    process.exit(5);
-  });
-})().catch((err) => {
-  console.error(String(err));
-  process.exit(1);
-});
-"""
 
+def _get_cookies_via_cdp(port: int, env: dict) -> list[dict]:
     result = subprocess.run(
-        ['node', '-e', node_script],
+        ["node", str(CHROME_UTILS), "getCookiesViaCdp", str(port)],
         capture_output=True,
         text=True,
         timeout=30,
-        env=env | {'CDP_PORT': str(port)},
+        env=env,
     )
-    assert result.returncode == 0, f"Failed to read cookies via CDP: {result.stderr}\nStdout: {result.stdout}"
-    return json.loads(result.stdout or '[]')
+    assert result.returncode == 0, (
+        f"Failed to read cookies via CDP: {result.stderr}\nStdout: {result.stdout}"
+    )
+    return json.loads(result.stdout or "[]")
 
 
 @pytest.fixture(scope="session", autouse=True)
@@ -129,57 +66,65 @@ def test_hook_scripts_exist():
 
 def test_verify_chromium_available():
     """Verify Chromium is available via CHROME_BINARY env var."""
-    chromium_binary = os.environ.get('CHROME_BINARY') or find_chromium_binary()
+    chromium_binary = os.environ.get("CHROME_BINARY") or find_chromium_binary()
 
-    assert chromium_binary, "Chromium binary should be available (set by fixture or found)"
-    assert Path(chromium_binary).exists(), f"Chromium binary should exist at {chromium_binary}"
+    assert chromium_binary, (
+        "Chromium binary should be available (set by fixture or found)"
+    )
+    assert Path(chromium_binary).exists(), (
+        f"Chromium binary should exist at {chromium_binary}"
+    )
 
     # Verify it's actually Chromium by checking version
     result = subprocess.run(
-        [chromium_binary, '--version'],
-        capture_output=True,
-        text=True,
-        timeout=10
+        [chromium_binary, "--version"], capture_output=True, text=True, timeout=10
     )
     assert result.returncode == 0, f"Failed to get Chromium version: {result.stderr}"
-    assert 'Chromium' in result.stdout or 'Chrome' in result.stdout, f"Unexpected version output: {result.stdout}"
+    assert "Chromium" in result.stdout or "Chrome" in result.stdout, (
+        f"Unexpected version output: {result.stdout}"
+    )
 
 
 def test_chrome_launch_and_tab_creation(chrome_test_url):
     """Integration test: Launch Chrome at crawl level and create tab at snapshot level."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
         # Get test environment with NODE_MODULES_DIR set
         env = get_test_env()
-        env['CHROME_HEADLESS'] = 'true'
+        env["CHROME_HEADLESS"] = "true"
         # chrome_launch writes to <CRAWL_DIR>/chrome, not cwd.
-        env['CRAWL_DIR'] = str(crawl_dir)
+        env["CRAWL_DIR"] = str(crawl_dir)
 
         # Launch Chrome at crawl level (background process)
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-123'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-crawl-123"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=env
+            env=env,
         )
 
-        # Wait for Chrome to launch (check process isn't dead and files exist)
-        for i in range(15):  # Wait up to 15 seconds for Chrome to start
+        # Wait for Chrome to launch (check process isn't dead and files exist).
+        # launchChromium() itself waits up to 30s for CDP readiness, so allow
+        # additional headroom here to avoid CI false negatives on cold runners.
+        launch_wait_seconds = 45
+        for i in range(launch_wait_seconds):
             if chrome_launch_process.poll() is not None:
                 stdout, stderr = chrome_launch_process.communicate()
-                pytest.fail(f"Chrome launch process exited early:\nStdout: {stdout}\nStderr: {stderr}")
-            if (chrome_dir / 'cdp_url.txt').exists():
+                pytest.fail(
+                    f"Chrome launch process exited early:\nStdout: {stdout}\nStderr: {stderr}"
+                )
+            if (chrome_dir / "cdp_url.txt").exists():
                 break
             time.sleep(1)
 
         # Verify Chrome launch outputs - if it failed, get the error from the process
-        if not (chrome_dir / 'cdp_url.txt').exists():
+        if not (chrome_dir / "cdp_url.txt").exists():
             # Try to get output from the process
             try:
                 stdout, stderr = chrome_launch_process.communicate(timeout=1)
@@ -191,27 +136,35 @@ def test_chrome_launch_and_tab_creation(chrome_test_url):
             if chrome_dir.exists():
                 files = list(chrome_dir.iterdir())
                 # Check if Chrome process is still alive
-                if (chrome_dir / 'chrome.pid').exists():
-                    chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+                if (chrome_dir / "chrome.pid").exists():
+                    chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
                     try:
                         os.kill(chrome_pid, 0)
                         chrome_alive = "yes"
                     except OSError:
                         chrome_alive = "no"
-                    pytest.fail(f"cdp_url.txt missing after 15s. Chrome dir files: {files}. Chrome process {chrome_pid} alive: {chrome_alive}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
+                    pytest.fail(
+                        f"cdp_url.txt missing after {launch_wait_seconds}s. Chrome dir files: {files}. Chrome process {chrome_pid} alive: {chrome_alive}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}"
+                    )
                 else:
-                    pytest.fail(f"cdp_url.txt missing. Chrome dir exists with files: {files}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
+                    pytest.fail(
+                        f"cdp_url.txt missing. Chrome dir exists with files: {files}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}"
+                    )
             else:
-                pytest.fail(f"Chrome dir {chrome_dir} doesn't exist\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
+                pytest.fail(
+                    f"Chrome dir {chrome_dir} doesn't exist\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}"
+                )
 
-        assert (chrome_dir / 'cdp_url.txt').exists(), "cdp_url.txt should exist"
-        assert (chrome_dir / 'chrome.pid').exists(), "chrome.pid should exist"
-        assert (chrome_dir / 'port.txt').exists(), "port.txt should exist"
+        assert (chrome_dir / "cdp_url.txt").exists(), "cdp_url.txt should exist"
+        assert (chrome_dir / "chrome.pid").exists(), "chrome.pid should exist"
+        assert (chrome_dir / "port.txt").exists(), "port.txt should exist"
 
-        cdp_url = (chrome_dir / 'cdp_url.txt').read_text().strip()
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+        cdp_url = (chrome_dir / "cdp_url.txt").read_text().strip()
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
 
-        assert cdp_url.startswith('ws://'), f"CDP URL should be WebSocket URL: {cdp_url}"
+        assert cdp_url.startswith("ws://"), (
+            f"CDP URL should be WebSocket URL: {cdp_url}"
+        )
         assert chrome_pid > 0, "Chrome PID should be valid"
 
         # Verify Chrome process is running
@@ -221,38 +174,50 @@ def test_chrome_launch_and_tab_creation(chrome_test_url):
             pytest.fail(f"Chrome process {chrome_pid} is not running")
 
         # Create snapshot directory and tab
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
+        snapshot_dir = Path(tmpdir) / "snapshot1"
         snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
+        snapshot_chrome_dir = snapshot_dir / "chrome"
         snapshot_chrome_dir.mkdir()
 
         # Launch tab at snapshot level
-        env['CRAWL_DIR'] = str(crawl_dir)
-        env['SNAP_DIR'] = str(snapshot_dir)
+        env["CRAWL_DIR"] = str(crawl_dir)
+        env["SNAP_DIR"] = str(snapshot_dir)
         result = subprocess.run(
-            ['node', str(CHROME_TAB_HOOK), f'--url={chrome_test_url}', '--snapshot-id=snap-123', '--crawl-id=test-crawl-123'],
+            [
+                "node",
+                str(CHROME_TAB_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=snap-123",
+                "--crawl-id=test-crawl-123",
+            ],
             cwd=str(snapshot_chrome_dir),
             capture_output=True,
             text=True,
             timeout=60,
-            env=env
+            env=env,
         )
 
-        assert result.returncode == 0, f"Tab creation failed: {result.stderr}\nStdout: {result.stdout}"
+        assert result.returncode == 0, (
+            f"Tab creation failed: {result.stderr}\nStdout: {result.stdout}"
+        )
 
         # Verify tab creation outputs
-        assert (snapshot_chrome_dir / 'cdp_url.txt').exists(), "Snapshot cdp_url.txt should exist"
-        assert (snapshot_chrome_dir / 'target_id.txt').exists(), "target_id.txt should exist"
-        assert (snapshot_chrome_dir / 'url.txt').exists(), "url.txt should exist"
+        assert (snapshot_chrome_dir / "cdp_url.txt").exists(), (
+            "Snapshot cdp_url.txt should exist"
+        )
+        assert (snapshot_chrome_dir / "target_id.txt").exists(), (
+            "target_id.txt should exist"
+        )
+        assert (snapshot_chrome_dir / "url.txt").exists(), "url.txt should exist"
 
-        target_id = (snapshot_chrome_dir / 'target_id.txt').read_text().strip()
+        target_id = (snapshot_chrome_dir / "target_id.txt").read_text().strip()
         assert len(target_id) > 0, "Target ID should not be empty"
 
         # Cleanup: Kill Chrome and launch process
         try:
             chrome_launch_process.send_signal(signal.SIGTERM)
             chrome_launch_process.wait(timeout=5)
-        except:
+        except Exception:
             pass
         try:
             os.kill(chrome_pid, signal.SIGKILL)
@@ -263,55 +228,59 @@ def test_chrome_launch_and_tab_creation(chrome_test_url):
 def test_cookies_imported_on_launch():
     """Integration test: COOKIES_TXT_FILE is imported at crawl start."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
-        cookies_file = Path(tmpdir) / 'cookies.txt'
+        cookies_file = Path(tmpdir) / "cookies.txt"
         cookies_file.write_text(
-            '\n'.join([
-                '# Netscape HTTP Cookie File',
-                '# https://curl.se/docs/http-cookies.html',
-                '# This file was generated by a test',
-                '',
-                'example.com\tTRUE\t/\tFALSE\t2147483647\tabx_test_cookie\thello',
-                '',
-            ])
+            "\n".join(
+                [
+                    "# Netscape HTTP Cookie File",
+                    "# https://curl.se/docs/http-cookies.html",
+                    "# This file was generated by a test",
+                    "",
+                    "example.com\tTRUE\t/\tFALSE\t2147483647\tabx_test_cookie\thello",
+                    "",
+                ]
+            )
         )
 
-        profile_dir = Path(tmpdir) / 'profile'
+        profile_dir = Path(tmpdir) / "profile"
         env = get_test_env()
-        env.update({
-            'CHROME_HEADLESS': 'true',
-            'CHROME_USER_DATA_DIR': str(profile_dir),
-            'COOKIES_TXT_FILE': str(cookies_file),
-            'CRAWL_DIR': str(crawl_dir),
-        })
+        env.update(
+            {
+                "CHROME_HEADLESS": "true",
+                "CHROME_USER_DATA_DIR": str(profile_dir),
+                "COOKIES_TXT_FILE": str(cookies_file),
+                "CRAWL_DIR": str(crawl_dir),
+            }
+        )
 
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-cookies'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-crawl-cookies"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=env
+            env=env,
         )
 
         for _ in range(15):
-            if (chrome_dir / 'port.txt').exists():
+            if (chrome_dir / "port.txt").exists():
                 break
             time.sleep(1)
 
-        assert (chrome_dir / 'port.txt').exists(), "port.txt should exist"
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        port = int((chrome_dir / 'port.txt').read_text().strip())
+        assert (chrome_dir / "port.txt").exists(), "port.txt should exist"
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
+        port = int((chrome_dir / "port.txt").read_text().strip())
 
         cookie_found = False
         for _ in range(15):
             cookies = _get_cookies_via_cdp(port, env)
             cookie_found = any(
-                c.get('name') == 'abx_test_cookie' and c.get('value') == 'hello'
+                c.get("name") == "abx_test_cookie" and c.get("value") == "hello"
                 for c in cookies
             )
             if cookie_found:
@@ -324,7 +293,7 @@ def test_cookies_imported_on_launch():
         try:
             chrome_launch_process.send_signal(signal.SIGTERM)
             chrome_launch_process.wait(timeout=5)
-        except:
+        except Exception:
             pass
         try:
             os.kill(chrome_pid, signal.SIGKILL)
@@ -335,78 +304,100 @@ def test_cookies_imported_on_launch():
 def test_chrome_navigation(chrome_test_url):
     """Integration test: Navigate to a URL."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
-        launch_env = get_test_env() | {'CRAWL_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
+        launch_env = get_test_env() | {
+            "CRAWL_DIR": str(crawl_dir),
+            "CHROME_HEADLESS": "true",
+        }
         # Launch Chrome (background process)
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-nav'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-crawl-nav"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=launch_env
+            env=launch_env,
         )
 
         # Wait for Chrome to launch
         time.sleep(3)
 
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
 
         # Create snapshot and tab
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
+        snapshot_dir = Path(tmpdir) / "snapshot1"
         snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
+        snapshot_chrome_dir = snapshot_dir / "chrome"
         snapshot_chrome_dir.mkdir()
 
         tab_env = get_test_env() | {
-            'CRAWL_DIR': str(crawl_dir),
-            'SNAP_DIR': str(snapshot_dir),
-            'CHROME_HEADLESS': 'true',
+            "CRAWL_DIR": str(crawl_dir),
+            "SNAP_DIR": str(snapshot_dir),
+            "CHROME_HEADLESS": "true",
         }
         result = subprocess.run(
-            ['node', str(CHROME_TAB_HOOK), f'--url={chrome_test_url}', '--snapshot-id=snap-nav-123', '--crawl-id=test-crawl-nav'],
+            [
+                "node",
+                str(CHROME_TAB_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=snap-nav-123",
+                "--crawl-id=test-crawl-nav",
+            ],
             cwd=str(snapshot_chrome_dir),
             capture_output=True,
             text=True,
             timeout=60,
-            env=tab_env
+            env=tab_env,
         )
         assert result.returncode == 0, f"Tab creation failed: {result.stderr}"
 
         # Navigate to URL
         nav_env = get_test_env() | {
-            'SNAP_DIR': str(snapshot_dir),
-            'CHROME_PAGELOAD_TIMEOUT': '30',
-            'CHROME_WAIT_FOR': 'load',
+            "SNAP_DIR": str(snapshot_dir),
+            "CHROME_PAGELOAD_TIMEOUT": "30",
+            "CHROME_WAIT_FOR": "load",
         }
         result = subprocess.run(
-            ['node', str(CHROME_NAVIGATE_HOOK), f'--url={chrome_test_url}', '--snapshot-id=snap-nav-123'],
+            [
+                "node",
+                str(CHROME_NAVIGATE_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=snap-nav-123",
+            ],
             cwd=str(snapshot_chrome_dir),
             capture_output=True,
             text=True,
             timeout=120,
-            env=nav_env
+            env=nav_env,
         )
 
-        assert result.returncode == 0, f"Navigation failed: {result.stderr}\nStdout: {result.stdout}"
+        assert result.returncode == 0, (
+            f"Navigation failed: {result.stderr}\nStdout: {result.stdout}"
+        )
 
         # Verify navigation outputs
-        assert (snapshot_chrome_dir / 'navigation.json').exists(), "navigation.json should exist"
-        assert (snapshot_chrome_dir / 'page_loaded.txt').exists(), "page_loaded.txt should exist"
+        assert (snapshot_chrome_dir / "navigation.json").exists(), (
+            "navigation.json should exist"
+        )
+        assert (snapshot_chrome_dir / "page_loaded.txt").exists(), (
+            "page_loaded.txt should exist"
+        )
 
-        nav_data = json.loads((snapshot_chrome_dir / 'navigation.json').read_text())
-        assert nav_data.get('status') in [200, 301, 302], f"Should get valid HTTP status: {nav_data}"
-        assert nav_data.get('finalUrl'), "Should have final URL"
+        nav_data = json.loads((snapshot_chrome_dir / "navigation.json").read_text())
+        assert nav_data.get("status") in [200, 301, 302], (
+            f"Should get valid HTTP status: {nav_data}"
+        )
+        assert nav_data.get("finalUrl"), "Should have final URL"
 
         # Cleanup
         try:
             chrome_launch_process.send_signal(signal.SIGTERM)
             chrome_launch_process.wait(timeout=5)
-        except:
+        except Exception:
             pass
         try:
             os.kill(chrome_pid, signal.SIGKILL)
@@ -417,45 +408,54 @@ def test_chrome_navigation(chrome_test_url):
 def test_tab_cleanup_on_sigterm(chrome_test_url):
     """Integration test: Tab cleanup when receiving SIGTERM."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
-        launch_env = get_test_env() | {'CRAWL_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
+        launch_env = get_test_env() | {
+            "CRAWL_DIR": str(crawl_dir),
+            "CHROME_HEADLESS": "true",
+        }
         # Launch Chrome (background process)
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-cleanup'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-cleanup"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=launch_env
+            env=launch_env,
         )
 
         # Wait for Chrome to launch
         time.sleep(3)
 
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
 
         # Create snapshot and tab - run in background
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
+        snapshot_dir = Path(tmpdir) / "snapshot1"
         snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
+        snapshot_chrome_dir = snapshot_dir / "chrome"
         snapshot_chrome_dir.mkdir()
 
         tab_env = get_test_env() | {
-            'CRAWL_DIR': str(crawl_dir),
-            'SNAP_DIR': str(snapshot_dir),
-            'CHROME_HEADLESS': 'true',
+            "CRAWL_DIR": str(crawl_dir),
+            "SNAP_DIR": str(snapshot_dir),
+            "CHROME_HEADLESS": "true",
         }
         tab_process = subprocess.Popen(
-            ['node', str(CHROME_TAB_HOOK), f'--url={chrome_test_url}', '--snapshot-id=snap-cleanup', '--crawl-id=test-cleanup'],
+            [
+                "node",
+                str(CHROME_TAB_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=snap-cleanup",
+                "--crawl-id=test-cleanup",
+            ],
             cwd=str(snapshot_chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=tab_env
+            env=tab_env,
         )
 
         # Wait for tab to be created
@@ -477,7 +477,7 @@ def test_tab_cleanup_on_sigterm(chrome_test_url):
         try:
             chrome_launch_process.send_signal(signal.SIGTERM)
             chrome_launch_process.wait(timeout=5)
-        except:
+        except Exception:
             pass
         try:
             os.kill(chrome_pid, signal.SIGKILL)
@@ -488,77 +488,94 @@ def test_tab_cleanup_on_sigterm(chrome_test_url):
 def test_multiple_snapshots_share_chrome(chrome_test_urls):
     """Integration test: Multiple snapshots share one Chrome instance."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
-        launch_env = get_test_env() | {'CRAWL_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
+        launch_env = get_test_env() | {
+            "CRAWL_DIR": str(crawl_dir),
+            "CHROME_HEADLESS": "true",
+        }
         # Launch Chrome at crawl level
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-multi-crawl'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-multi-crawl"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=launch_env
+            env=launch_env,
         )
 
         # Wait for Chrome to launch
         for i in range(15):
-            if (chrome_dir / 'cdp_url.txt').exists():
+            if (chrome_dir / "cdp_url.txt").exists():
                 break
             time.sleep(1)
 
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        crawl_cdp_url = (chrome_dir / 'cdp_url.txt').read_text().strip()
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
+        crawl_cdp_url = (chrome_dir / "cdp_url.txt").read_text().strip()
 
         # Create multiple snapshots that share this Chrome
         snapshot_dirs = []
         target_ids = []
 
         for snap_num in range(3):
-            snapshot_dir = Path(tmpdir) / f'snapshot{snap_num}'
+            snapshot_dir = Path(tmpdir) / f"snapshot{snap_num}"
             snapshot_dir.mkdir()
-            snapshot_chrome_dir = snapshot_dir / 'chrome'
+            snapshot_chrome_dir = snapshot_dir / "chrome"
             snapshot_chrome_dir.mkdir()
             snapshot_dirs.append(snapshot_chrome_dir)
 
             # Create tab for this snapshot
             tab_url = f"{chrome_test_urls['origin']}/snapshot-{snap_num}"
             tab_env = get_test_env() | {
-                'CRAWL_DIR': str(crawl_dir),
-                'SNAP_DIR': str(snapshot_dir),
-                'CHROME_HEADLESS': 'true',
+                "CRAWL_DIR": str(crawl_dir),
+                "SNAP_DIR": str(snapshot_dir),
+                "CHROME_HEADLESS": "true",
             }
             result = subprocess.run(
-                ['node', str(CHROME_TAB_HOOK), f'--url={tab_url}', f'--snapshot-id=snap-{snap_num}', '--crawl-id=test-multi-crawl'],
+                [
+                    "node",
+                    str(CHROME_TAB_HOOK),
+                    f"--url={tab_url}",
+                    f"--snapshot-id=snap-{snap_num}",
+                    "--crawl-id=test-multi-crawl",
+                ],
                 cwd=str(snapshot_chrome_dir),
                 capture_output=True,
                 text=True,
                 timeout=60,
-                env=tab_env
+                env=tab_env,
             )
 
-            assert result.returncode == 0, f"Tab {snap_num} creation failed: {result.stderr}"
+            assert result.returncode == 0, (
+                f"Tab {snap_num} creation failed: {result.stderr}"
+            )
 
             # Verify each snapshot has its own target_id but same Chrome PID
-            assert (snapshot_chrome_dir / 'target_id.txt').exists()
-            assert (snapshot_chrome_dir / 'cdp_url.txt').exists()
-            assert (snapshot_chrome_dir / 'chrome.pid').exists()
+            assert (snapshot_chrome_dir / "target_id.txt").exists()
+            assert (snapshot_chrome_dir / "cdp_url.txt").exists()
+            assert (snapshot_chrome_dir / "chrome.pid").exists()
 
-            target_id = (snapshot_chrome_dir / 'target_id.txt').read_text().strip()
-            snapshot_cdp_url = (snapshot_chrome_dir / 'cdp_url.txt').read_text().strip()
-            snapshot_pid = int((snapshot_chrome_dir / 'chrome.pid').read_text().strip())
+            target_id = (snapshot_chrome_dir / "target_id.txt").read_text().strip()
+            snapshot_cdp_url = (snapshot_chrome_dir / "cdp_url.txt").read_text().strip()
+            snapshot_pid = int((snapshot_chrome_dir / "chrome.pid").read_text().strip())
 
             target_ids.append(target_id)
 
             # All snapshots should share same Chrome
-            assert snapshot_pid == chrome_pid, f"Snapshot {snap_num} should use crawl Chrome PID"
-            assert snapshot_cdp_url == crawl_cdp_url, f"Snapshot {snap_num} should use crawl CDP URL"
+            assert snapshot_pid == chrome_pid, (
+                f"Snapshot {snap_num} should use crawl Chrome PID"
+            )
+            assert snapshot_cdp_url == crawl_cdp_url, (
+                f"Snapshot {snap_num} should use crawl CDP URL"
+            )
 
         # All target IDs should be unique (different tabs)
-        assert len(set(target_ids)) == 3, f"All snapshots should have unique tabs: {target_ids}"
+        assert len(set(target_ids)) == 3, (
+            f"All snapshots should have unique tabs: {target_ids}"
+        )
 
         # Chrome should still be running with all 3 tabs
         try:
@@ -570,7 +587,7 @@ def test_multiple_snapshots_share_chrome(chrome_test_urls):
         try:
             chrome_launch_process.send_signal(signal.SIGTERM)
             chrome_launch_process.wait(timeout=5)
-        except:
+        except Exception:
             pass
         try:
             os.kill(chrome_pid, signal.SIGKILL)
@@ -581,28 +598,41 @@ def test_multiple_snapshots_share_chrome(chrome_test_urls):
 def test_chrome_cleanup_on_crawl_end():
     """Integration test: Chrome cleanup at end of crawl."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
-        launch_env = get_test_env() | {'CRAWL_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
+        launch_env = get_test_env() | {
+            "CRAWL_DIR": str(crawl_dir),
+            "CHROME_HEADLESS": "true",
+        }
         # Launch Chrome in background
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-end'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-crawl-end"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=launch_env
+            env=launch_env,
         )
 
-        # Wait for Chrome to launch
-        time.sleep(3)
+        # Wait for Chrome launch state files and fail fast on early hook exit.
+        for _ in range(15):
+            if chrome_launch_process.poll() is not None:
+                stdout, stderr = chrome_launch_process.communicate()
+                pytest.fail(
+                    f"Chrome launch process exited early:\nStdout: {stdout}\nStderr: {stderr}"
+                )
+            if (chrome_dir / "cdp_url.txt").exists() and (
+                chrome_dir / "chrome.pid"
+            ).exists():
+                break
+            time.sleep(1)
 
         # Verify Chrome is running
-        assert (chrome_dir / 'chrome.pid').exists(), "Chrome PID file should exist"
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+        assert (chrome_dir / "chrome.pid").exists(), "Chrome PID file should exist"
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
 
         try:
             os.kill(chrome_pid, 0)
@@ -628,32 +658,37 @@ def test_chrome_cleanup_on_crawl_end():
 def test_zombie_prevention_hook_killed():
     """Integration test: Chrome is killed even if hook process is SIGKILL'd."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
+        crawl_dir = Path(tmpdir) / "crawl"
         crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir()
 
-        launch_env = get_test_env() | {'CRAWL_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
+        launch_env = get_test_env() | {
+            "CRAWL_DIR": str(crawl_dir),
+            "CHROME_HEADLESS": "true",
+        }
         # Launch Chrome
         chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-zombie'],
+            ["node", str(CHROME_LAUNCH_HOOK), "--crawl-id=test-zombie"],
             cwd=str(chrome_dir),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
-            env=launch_env
+            env=launch_env,
         )
 
         # Wait for Chrome to launch
         for i in range(15):
-            if (chrome_dir / 'chrome.pid').exists():
+            if (chrome_dir / "chrome.pid").exists():
                 break
             time.sleep(1)
 
-        assert (chrome_dir / 'chrome.pid').exists(), "Chrome PID file should exist"
+        assert (chrome_dir / "chrome.pid").exists(), "Chrome PID file should exist"
 
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        hook_pid = chrome_launch_process.pid  # Use the Popen process PID instead of hook.pid file
+        chrome_pid = int((chrome_dir / "chrome.pid").read_text().strip())
+        hook_pid = (
+            chrome_launch_process.pid
+        )  # Use the Popen process PID instead of hook.pid file
 
         # Verify both Chrome and hook are running
         try:
@@ -681,7 +716,7 @@ def is_process_alive(pid):
             except (OSError, ProcessLookupError):
                 return False
 
-        for pid_file in chrome_dir.glob('**/*.pid'):
+        for pid_file in chrome_dir.glob("**/*.pid"):
             try:
                 pid = int(pid_file.read_text().strip())
 
@@ -732,5 +767,5 @@ def is_process_alive(pid):
             pass
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/chrome/tests/test_chrome_test_helpers.py b/abx_plugins/plugins/chrome/tests/test_chrome_test_helpers.py
index fd5f9fe..b8ad190 100644
--- a/abx_plugins/plugins/chrome/tests/test_chrome_test_helpers.py
+++ b/abx_plugins/plugins/chrome/tests/test_chrome_test_helpers.py
@@ -19,6 +19,7 @@
     get_plugin_dir,
     get_hook_script,
     parse_jsonl_output,
+    install_chromium_with_hooks,
 )
 
 
@@ -26,75 +27,79 @@ def test_get_machine_type():
     """Test get_machine_type() returns valid format."""
     machine_type = get_machine_type()
     assert isinstance(machine_type, str)
-    assert '-' in machine_type, "Machine type should be in format: arch-os"
+    assert "-" in machine_type, "Machine type should be in format: arch-os"
     # Should be one of the expected formats
-    assert any(x in machine_type for x in ['arm64', 'x86_64']), "Should contain valid architecture"
-    assert any(x in machine_type for x in ['darwin', 'linux', 'win32']), "Should contain valid OS"
+    assert any(x in machine_type for x in ["arm64", "x86_64"]), (
+        "Should contain valid architecture"
+    )
+    assert any(x in machine_type for x in ["darwin", "linux", "win32"]), (
+        "Should contain valid OS"
+    )
 
 
 def test_get_lib_dir_with_env_var():
     """Test get_lib_dir() respects LIB_DIR env var."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        custom_lib = Path(tmpdir) / 'custom_lib'
+        custom_lib = Path(tmpdir) / "custom_lib"
         custom_lib.mkdir()
 
-        old_lib_dir = os.environ.get('LIB_DIR')
+        old_lib_dir = os.environ.get("LIB_DIR")
         try:
-            os.environ['LIB_DIR'] = str(custom_lib)
+            os.environ["LIB_DIR"] = str(custom_lib)
             lib_dir = get_lib_dir()
             assert lib_dir == custom_lib
         finally:
             if old_lib_dir:
-                os.environ['LIB_DIR'] = old_lib_dir
+                os.environ["LIB_DIR"] = old_lib_dir
             else:
-                os.environ.pop('LIB_DIR', None)
+                os.environ.pop("LIB_DIR", None)
 
 
 def test_get_node_modules_dir_with_env_var():
     """Test get_node_modules_dir() respects NODE_MODULES_DIR env var."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        custom_nm = Path(tmpdir) / 'node_modules'
+        custom_nm = Path(tmpdir) / "node_modules"
         custom_nm.mkdir()
 
-        old_nm_dir = os.environ.get('NODE_MODULES_DIR')
+        old_nm_dir = os.environ.get("NODE_MODULES_DIR")
         try:
-            os.environ['NODE_MODULES_DIR'] = str(custom_nm)
+            os.environ["NODE_MODULES_DIR"] = str(custom_nm)
             nm_dir = get_node_modules_dir()
             assert nm_dir == custom_nm
         finally:
             if old_nm_dir:
-                os.environ['NODE_MODULES_DIR'] = old_nm_dir
+                os.environ["NODE_MODULES_DIR"] = old_nm_dir
             else:
-                os.environ.pop('NODE_MODULES_DIR', None)
+                os.environ.pop("NODE_MODULES_DIR", None)
 
 
 def test_get_extensions_dir_default():
     """Test get_extensions_dir() returns expected path format."""
     ext_dir = get_extensions_dir()
     assert isinstance(ext_dir, str)
-    assert 'personas' in ext_dir
-    assert 'chrome_extensions' in ext_dir
+    assert "personas" in ext_dir
+    assert "chrome_extensions" in ext_dir
 
 
 def test_get_extensions_dir_with_custom_persona():
     """Test get_extensions_dir() respects ACTIVE_PERSONA env var."""
-    old_persona = os.environ.get('ACTIVE_PERSONA')
-    old_personas_dir = os.environ.get('PERSONAS_DIR')
+    old_persona = os.environ.get("ACTIVE_PERSONA")
+    old_personas_dir = os.environ.get("PERSONAS_DIR")
     try:
-        os.environ['ACTIVE_PERSONA'] = 'TestPersona'
-        os.environ['PERSONAS_DIR'] = '/tmp/test-personas'
+        os.environ["ACTIVE_PERSONA"] = "TestPersona"
+        os.environ["PERSONAS_DIR"] = "/tmp/test-personas"
         ext_dir = get_extensions_dir()
-        assert 'TestPersona' in ext_dir
-        assert '/tmp/test-personas' in ext_dir
+        assert "TestPersona" in ext_dir
+        assert "/tmp/test-personas" in ext_dir
     finally:
         if old_persona:
-            os.environ['ACTIVE_PERSONA'] = old_persona
+            os.environ["ACTIVE_PERSONA"] = old_persona
         else:
-            os.environ.pop('ACTIVE_PERSONA', None)
+            os.environ.pop("ACTIVE_PERSONA", None)
         if old_personas_dir:
-            os.environ['PERSONAS_DIR'] = old_personas_dir
+            os.environ["PERSONAS_DIR"] = old_personas_dir
         else:
-            os.environ.pop('PERSONAS_DIR', None)
+            os.environ.pop("PERSONAS_DIR", None)
 
 
 def test_get_test_env_returns_dict():
@@ -103,15 +108,15 @@ def test_get_test_env_returns_dict():
     assert isinstance(env, dict)
 
     # Should include key paths
-    assert 'MACHINE_TYPE' in env
-    assert 'LIB_DIR' in env
-    assert 'NODE_MODULES_DIR' in env
-    assert 'NODE_PATH' in env  # Critical for module resolution
-    assert 'NPM_BIN_DIR' in env
-    assert 'CHROME_EXTENSIONS_DIR' in env
+    assert "MACHINE_TYPE" in env
+    assert "LIB_DIR" in env
+    assert "NODE_MODULES_DIR" in env
+    assert "NODE_PATH" in env  # Critical for module resolution
+    assert "NPM_BIN_DIR" in env
+    assert "CHROME_EXTENSIONS_DIR" in env
 
     # Verify NODE_PATH equals NODE_MODULES_DIR (for Node.js module resolution)
-    assert env['NODE_PATH'] == env['NODE_MODULES_DIR']
+    assert env["NODE_PATH"] == env["NODE_MODULES_DIR"]
 
 
 def test_get_test_env_paths_are_absolute():
@@ -119,9 +124,9 @@ def test_get_test_env_paths_are_absolute():
     env = get_test_env()
 
     # All path-like values should be absolute
-    assert Path(env['LIB_DIR']).is_absolute()
-    assert Path(env['NODE_MODULES_DIR']).is_absolute()
-    assert Path(env['NODE_PATH']).is_absolute()
+    assert Path(env["LIB_DIR"]).is_absolute()
+    assert Path(env["NODE_MODULES_DIR"]).is_absolute()
+    assert Path(env["NODE_PATH"]).is_absolute()
 
 
 def test_find_chromium_binary():
@@ -142,8 +147,8 @@ def test_get_plugin_dir():
     assert plugin_dir.exists()
     assert plugin_dir.is_dir()
     # Should be the chrome plugin directory
-    assert plugin_dir.name == 'chrome'
-    assert (plugin_dir.parent.name == 'plugins')
+    assert plugin_dir.name == "chrome"
+    assert plugin_dir.parent.name == "plugins"
 
 
 def test_get_hook_script_finds_existing_hook():
@@ -151,81 +156,81 @@ def test_get_hook_script_finds_existing_hook():
     from abx_plugins.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
 
     # Try to find the chrome launch hook
-    hook = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*')
+    hook = get_hook_script(CHROME_PLUGIN_DIR, "on_Crawl__*_chrome_launch.*")
 
     if hook:  # May not exist in all test environments
         assert hook.exists()
         assert hook.is_file()
-        assert 'chrome_launch' in hook.name
+        assert "chrome_launch" in hook.name
 
 
 def test_get_hook_script_returns_none_for_missing():
     """Test get_hook_script() returns None for non-existent hooks."""
     from abx_plugins.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
 
-    hook = get_hook_script(CHROME_PLUGIN_DIR, 'nonexistent_hook_*_pattern.*')
+    hook = get_hook_script(CHROME_PLUGIN_DIR, "nonexistent_hook_*_pattern.*")
     assert hook is None
 
 
 def test_parse_jsonl_output_valid():
     """Test parse_jsonl_output() parses valid JSONL."""
-    jsonl_output = '''{"type": "ArchiveResult", "status": "succeeded", "output": "test1"}
+    jsonl_output = """{"type": "ArchiveResult", "status": "succeeded", "output": "test1"}
 {"type": "ArchiveResult", "status": "failed", "error": "test2"}
-'''
+"""
 
     # Returns first match only
     result = parse_jsonl_output(jsonl_output)
     assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['status'] == 'succeeded'
-    assert result['output'] == 'test1'
+    assert result["type"] == "ArchiveResult"
+    assert result["status"] == "succeeded"
+    assert result["output"] == "test1"
 
 
 def test_parse_jsonl_output_with_non_json_lines():
     """Test parse_jsonl_output() skips non-JSON lines."""
-    mixed_output = '''Some non-JSON output
+    mixed_output = """Some non-JSON output
 {"type": "ArchiveResult", "status": "succeeded"}
 More non-JSON
 {"type": "ArchiveResult", "status": "failed"}
-'''
+"""
 
     result = parse_jsonl_output(mixed_output)
     assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['status'] == 'succeeded'
+    assert result["type"] == "ArchiveResult"
+    assert result["status"] == "succeeded"
 
 
 def test_parse_jsonl_output_empty():
     """Test parse_jsonl_output() handles empty input."""
-    result = parse_jsonl_output('')
+    result = parse_jsonl_output("")
     assert result is None
 
 
 def test_parse_jsonl_output_filters_by_type():
     """Test parse_jsonl_output() can filter by record type."""
-    jsonl_output = '''{"type": "LogEntry", "data": "log1"}
+    jsonl_output = """{"type": "LogEntry", "data": "log1"}
 {"type": "ArchiveResult", "data": "result1"}
 {"type": "ArchiveResult", "data": "result2"}
-'''
+"""
 
     # Should return first ArchiveResult, not LogEntry
-    result = parse_jsonl_output(jsonl_output, record_type='ArchiveResult')
+    result = parse_jsonl_output(jsonl_output, record_type="ArchiveResult")
     assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['data'] == 'result1'  # First ArchiveResult
+    assert result["type"] == "ArchiveResult"
+    assert result["data"] == "result1"  # First ArchiveResult
 
 
 def test_parse_jsonl_output_filters_custom_type():
     """Test parse_jsonl_output() can filter by custom record type."""
-    jsonl_output = '''{"type": "ArchiveResult", "data": "result1"}
+    jsonl_output = """{"type": "ArchiveResult", "data": "result1"}
 {"type": "LogEntry", "data": "log1"}
 {"type": "ArchiveResult", "data": "result2"}
-'''
+"""
 
-    result = parse_jsonl_output(jsonl_output, record_type='LogEntry')
+    result = parse_jsonl_output(jsonl_output, record_type="LogEntry")
     assert result is not None
-    assert result['type'] == 'LogEntry'
-    assert result['data'] == 'log1'
+    assert result["type"] == "LogEntry"
+    assert result["data"] == "log1"
 
 
 def test_machine_type_consistency():
@@ -238,20 +243,51 @@ def test_machine_type_consistency():
 def test_lib_dir_is_directory():
     """Test that lib_dir points to an actual directory when HOME is set."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        old_home = os.environ.get('HOME')
+        old_home = os.environ.get("HOME")
         try:
-            os.environ['HOME'] = tmpdir
-            lib_dir = Path(tmpdir) / '.config' / 'abx' / 'lib'
+            os.environ["HOME"] = tmpdir
+            lib_dir = Path(tmpdir) / ".config" / "abx" / "lib"
             lib_dir.mkdir(parents=True, exist_ok=True)
 
             result = get_lib_dir()
             assert isinstance(result, Path)
         finally:
             if old_home:
-                os.environ['HOME'] = old_home
+                os.environ["HOME"] = old_home
             else:
-                os.environ.pop('HOME', None)
+                os.environ.pop("HOME", None)
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+def test_install_chromium_with_hooks_reuses_existing_chromium_via_env(tmp_path: Path):
+    """Use public env inputs only: existing CHROME_BINARY should be reused."""
+    chromium_path = tmp_path / "chromium"
+    chromium_path.write_text("#!/bin/sh\nexit 0\n")
+    chromium_path.chmod(0o755)
+
+    # Provide a minimal local puppeteer package so require.resolve('puppeteer')
+    # succeeds without network installs.
+    node_modules_dir = tmp_path / "lib" / "npm" / "node_modules"
+    puppeteer_dir = node_modules_dir / "puppeteer"
+    puppeteer_dir.mkdir(parents=True, exist_ok=True)
+    (puppeteer_dir / "package.json").write_text(
+        '{"name":"puppeteer","version":"0.0.0","main":"index.js"}\n'
+    )
+    (puppeteer_dir / "index.js").write_text("module.exports = {};\n")
+
+    env = get_test_env()
+    env.update(
+        {
+            "CHROME_BINARY": str(chromium_path),
+            "LIB_DIR": str(tmp_path / "lib"),
+            "NODE_MODULES_DIR": str(node_modules_dir),
+            "NODE_PATH": str(node_modules_dir),
+        }
+    )
+    resolved = install_chromium_with_hooks(env, timeout=1)
+
+    assert resolved == str(chromium_path)
+    assert env["CHROME_BINARY"] == str(chromium_path)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/consolelog/tests/test_consolelog.py b/abx_plugins/plugins/consolelog/tests/test_consolelog.py
index 1dc0d55..c71f967 100644
--- a/abx_plugins/plugins/consolelog/tests/test_consolelog.py
+++ b/abx_plugins/plugins/consolelog/tests/test_consolelog.py
@@ -13,6 +13,8 @@
 from pathlib import Path
 
 import pytest
+
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
     CHROME_NAVIGATE_HOOK,
@@ -23,7 +25,7 @@
 
 # Get the path to the consolelog hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-CONSOLELOG_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_consolelog.*')
+CONSOLELOG_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_consolelog.*")
 
 
 class TestConsolelogPlugin:
@@ -31,7 +33,9 @@ class TestConsolelogPlugin:
 
     def test_consolelog_hook_exists(self):
         """Consolelog hook script should exist."""
-        assert CONSOLELOG_HOOK is not None, "Consolelog hook not found in plugin directory"
+        assert CONSOLELOG_HOOK is not None, (
+            "Consolelog hook not found in plugin directory"
+        )
         assert CONSOLELOG_HOOK.exists(), f"Hook not found: {CONSOLELOG_HOOK}"
 
 
@@ -48,42 +52,54 @@ def teardown_method(self, _method=None):
 
     def test_consolelog_captures_output(self):
         """Consolelog hook should capture console output from page."""
-        test_url = 'data:text/html,<script>console.log("archivebox-console-test")</script>'
-        snapshot_id = 'test-consolelog-snapshot'
+        test_url = (
+            'data:text/html,<script>console.log("archivebox-console-test")</script>'
+        )
+        snapshot_id = "test-consolelog-snapshot"
 
         with chrome_session(
             self.temp_dir,
-            crawl_id='test-consolelog-crawl',
+            crawl_id="test-consolelog-crawl",
             snapshot_id=snapshot_id,
             test_url=test_url,
             navigate=False,
             timeout=30,
         ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            console_dir = snapshot_chrome_dir.parent / 'consolelog'
+            console_dir = snapshot_chrome_dir.parent / "consolelog"
             console_dir.mkdir(exist_ok=True)
 
             # Run consolelog hook with the active Chrome session (background hook)
             result = subprocess.Popen(
-                ['node', str(CONSOLELOG_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(CONSOLELOG_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(console_dir),
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 text=True,
-                env=env
+                env=env,
             )
 
             nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(CHROME_NAVIGATE_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(snapshot_chrome_dir),
                 capture_output=True,
                 text=True,
                 timeout=120,
-                env=env
+                env=env,
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
             # Check for output file
-            console_output = console_dir / 'console.jsonl'
+            console_output = console_dir / "console.jsonl"
 
             # Allow it to run briefly, then terminate (background hook)
             for _ in range(10):
@@ -101,23 +117,23 @@ def test_consolelog_captures_output(self):
                 stdout, stderr = result.communicate()
 
             # At minimum, verify no crash
-            assert 'Traceback' not in stderr
+            assert "Traceback" not in stderr
 
             # If output file exists, verify it's valid JSONL and has output
             if console_output.exists():
                 with open(console_output) as f:
                     content = f.read().strip()
                     assert content, "Console output should not be empty"
-                    for line in content.split('\n'):
+                    for line in content.split("\n"):
                         if line.strip():
                             try:
                                 record = json.loads(line)
                                 # Verify structure
-                                assert 'timestamp' in record
-                                assert 'type' in record
+                                assert "timestamp" in record
+                                assert "type" in record
                             except json.JSONDecodeError:
                                 pass  # Some lines may be incomplete
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/custom/on_Binary__14_custom_install.py b/abx_plugins/plugins/custom/on_Binary__14_custom_install.py
index f0395bd..332105e 100755
--- a/abx_plugins/plugins/custom/on_Binary__14_custom_install.py
+++ b/abx_plugins/plugins/custom/on_Binary__14_custom_install.py
@@ -14,7 +14,6 @@
 #     ./on_Binary__14_custom_install.py [...] > events.jsonl
 
 import json
-import os
 import subprocess
 import sys
 
@@ -23,15 +22,17 @@
 
 
 @click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', required=True, help="Custom bash command to run")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str):
+@click.option("--binary-id", required=True, help="Binary UUID")
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--name", required=True, help="Binary name to install")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--custom-cmd", required=True, help="Custom bash command to run")
+def main(
+    binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str
+):
     """Install binary using custom bash command."""
 
-    if binproviders != '*' and 'custom' not in binproviders.split(','):
+    if binproviders != "*" and "custom" not in binproviders.split(","):
         click.echo(f"custom provider not allowed for {name}", err=True)
         sys.exit(0)
 
@@ -63,7 +64,7 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
             binary = Binary(
                 name=name,
                 binproviders=[provider],
-                overrides={'env': {'version': '0.0.1'}},
+                overrides={"env": {"version": "0.0.1"}},
             ).load()
         except Exception as e:
             click.echo(f"{name} not found after custom install: {e}", err=True)
@@ -73,18 +74,16 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
         click.echo(f"{name} not found after custom install", err=True)
         sys.exit(1)
 
-    machine_id = os.environ.get('MACHINE_ID', '')
-
     # Output Binary JSONL record to stdout
     record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'custom',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
+        "type": "Binary",
+        "name": name,
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": "custom",
+        "machine_id": machine_id,
+        "binary_id": binary_id,
     }
     print(json.dumps(record))
 
@@ -95,5 +94,5 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/custom/tests/test_custom_provider.py b/abx_plugins/plugins/custom/tests/test_custom_provider.py
index 982b7b2..4fc3333 100644
--- a/abx_plugins/plugins/custom/tests/test_custom_provider.py
+++ b/abx_plugins/plugins/custom/tests/test_custom_provider.py
@@ -16,7 +16,7 @@
 
 # Get the path to the custom provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_custom_install.py'), None)
+INSTALL_HOOK = next(PLUGIN_DIR.glob("on_Binary__*_custom_install.py"), None)
 
 
 class TestCustomProviderHook:
@@ -29,6 +29,7 @@ def setup_method(self, _method=None):
     def teardown_method(self, _method=None):
         """Clean up."""
         import shutil
+
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def test_hook_script_exists(self):
@@ -38,60 +39,62 @@ def test_hook_script_exists(self):
     def test_hook_skips_when_custom_not_allowed(self):
         """Hook should skip when custom not in allowed binproviders."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # custom not allowed
-                '--custom-cmd=echo hello',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=echo",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                "--binproviders=pip,apt",  # custom not allowed
+                "--custom-cmd=echo hello",
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should exit cleanly (code 0) when custom not allowed
         assert result.returncode == 0
-        assert 'custom provider not allowed' in result.stderr
+        assert "custom provider not allowed" in result.stderr
 
     def test_hook_runs_custom_command_and_finds_binary(self):
         """Hook should run custom command and find the binary in PATH."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         # Use a simple echo command that doesn't actually install anything
         # Then check for 'echo' which is already in PATH
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=echo",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
                 '--custom-cmd=echo "custom install simulation"',
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should succeed since echo is in PATH
         assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
         # Parse JSONL output
-        for line in result.stdout.split('\n'):
+        for line in result.stdout.split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'echo':
-                        assert record['binprovider'] == 'custom'
-                        assert record['abspath']
+                    if record.get("type") == "Binary" and record.get("name") == "echo":
+                        assert record["binprovider"] == "custom"
+                        assert record["abspath"]
                         return
                 except json.JSONDecodeError:
                     continue
@@ -101,48 +104,50 @@ def test_hook_runs_custom_command_and_finds_binary(self):
     def test_hook_fails_for_missing_binary_after_command(self):
         """Hook should fail if binary not found after running custom command."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_binary_xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=nonexistent_binary_xyz123",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
                 '--custom-cmd=echo "failed install"',  # Doesn't actually install
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should fail since binary not found after command
         assert result.returncode == 1
-        assert 'not found' in result.stderr.lower()
+        assert "not found" in result.stderr.lower()
 
     def test_hook_fails_for_failing_command(self):
         """Hook should fail if custom command returns non-zero exit code."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=exit 1',  # Command that fails
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=echo",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                "--custom-cmd=exit 1",  # Command that fails
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should fail with exit code 1
         assert result.returncode == 1
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/dns/tests/conftest.py b/abx_plugins/plugins/dns/tests/conftest.py
new file mode 100644
index 0000000..87b3198
--- /dev/null
+++ b/abx_plugins/plugins/dns/tests/conftest.py
@@ -0,0 +1,12 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def require_chrome_runtime():
+    """Require chrome runtime prerequisites for integration tests."""
+    from abx_pkg import NpmProvider
+
+    try:
+        NpmProvider()
+    except Exception as exc:
+        pytest.fail(f"Chrome integration prerequisites unavailable: {exc}")
diff --git a/abx_plugins/plugins/dns/tests/test_dns.py b/abx_plugins/plugins/dns/tests/test_dns.py
index 8a8dabc..4a6db0e 100644
--- a/abx_plugins/plugins/dns/tests/test_dns.py
+++ b/abx_plugins/plugins/dns/tests/test_dns.py
@@ -10,22 +10,23 @@
 import subprocess
 import tempfile
 import time
-from urllib.parse import urlparse
 from pathlib import Path
 
 import pytest
+
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
     CHROME_NAVIGATE_HOOK,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_url,
 )
 
 
 # Get the path to the DNS hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-DNS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dns.*')
+DNS_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_dns.*")
+TEST_URL = "https://example.com"
 
 
 class TestDNSPlugin:
@@ -48,42 +49,52 @@ def teardown_method(self, _method=None):
         """Clean up."""
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
-    def test_dns_records_captured(self, chrome_test_url):
+    def test_dns_records_captured(self, require_chrome_runtime):
         """DNS hook should capture DNS records from a real URL."""
-        test_url = chrome_test_url
-        snapshot_id = 'test-dns-snapshot'
+        test_url = TEST_URL
+        snapshot_id = "test-dns-snapshot"
 
         with chrome_session(
             self.temp_dir,
-            crawl_id='test-dns-crawl',
+            crawl_id="test-dns-crawl",
             snapshot_id=snapshot_id,
             test_url=test_url,
             navigate=False,
             timeout=30,
         ) as (_process, _pid, snapshot_chrome_dir, env):
-            dns_dir = snapshot_chrome_dir.parent / 'dns'
+            dns_dir = snapshot_chrome_dir.parent / "dns"
             dns_dir.mkdir(exist_ok=True)
 
             result = subprocess.Popen(
-                ['node', str(DNS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(DNS_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(dns_dir),
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 text=True,
-                env=env
+                env=env,
             )
 
             nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(CHROME_NAVIGATE_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(snapshot_chrome_dir),
                 capture_output=True,
                 text=True,
                 timeout=120,
-                env=env
+                env=env,
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
-            dns_output = dns_dir / 'dns.jsonl'
+            dns_output = dns_dir / "dns.jsonl"
             for _ in range(30):
                 if dns_output.exists() and dns_output.stat().st_size > 0:
                     break
@@ -99,21 +110,14 @@ def test_dns_records_captured(self, chrome_test_url):
             else:
                 stdout, stderr = result.communicate()
 
-            assert 'Traceback' not in stderr
+            assert "Traceback" not in stderr
 
             assert dns_output.exists(), "dns.jsonl not created"
             content = dns_output.read_text().strip()
-            host = urlparse(test_url).hostname or ""
-            if not content:
-                # Local deterministic fixtures often resolve directly to loopback without
-                # emitting DNS events, so treat empty output as valid in that case.
-                assert host in {"127.0.0.1", "localhost"}, (
-                    f"DNS output unexpectedly empty for non-local host: {test_url}"
-                )
-                return
+            assert content, f"DNS output unexpectedly empty for {test_url}"
 
             records = []
-            for line in content.split('\n'):
+            for line in content.split("\n"):
                 line = line.strip()
                 if not line:
                     continue
@@ -123,9 +127,9 @@ def test_dns_records_captured(self, chrome_test_url):
                     pass
 
             assert records, "No DNS records parsed"
-            has_ip_record = any(r.get('hostname') and r.get('ip') for r in records)
+            has_ip_record = any(r.get("hostname") and r.get("ip") for r in records)
             assert has_ip_record, f"No DNS record with hostname + ip: {records}"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/dom/on_Snapshot__53_dom.js b/abx_plugins/plugins/dom/on_Snapshot__53_dom.js
index ad04db3..3e8b54f 100644
--- a/abx_plugins/plugins/dom/on_Snapshot__53_dom.js
+++ b/abx_plugins/plugins/dom/on_Snapshot__53_dom.js
@@ -18,8 +18,11 @@ if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_
 
 const {
     getEnvBool,
+    getEnvInt,
     parseArgs,
     readCdpUrl,
+    connectToPage,
+    waitForPageLoaded,
 } = require('../chrome/chrome_utils.js');
 
 // Check if DOM is enabled BEFORE requiring puppeteer
@@ -64,48 +67,26 @@ function hasStaticFileOutput() {
     return false;
 }
 
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-async function dumpDom(url) {
+async function dumpDom(url, timeoutMs) {
     // Output directory is current directory (hook already runs in output dir)
     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
 
     let browser = null;
-    let page = null;
 
     try {
-        // Connect to existing Chrome session (required)
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
+        if (!readCdpUrl(CHROME_SESSION_DIR)) {
             return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
         }
 
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-            defaultViewport: null,
+        const connection = await connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs,
+            puppeteer,
         });
+        browser = connection.browser;
+        const page = connection.page;
 
-        // Get existing pages or create new one
-        const pages = await browser.pages();
-        page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            page = await browser.newPage();
-        }
+        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs * 4, 200);
 
         // Get the full DOM content
         const domContent = await page.content();
@@ -149,18 +130,9 @@ async function main() {
             process.exit(0);
         }
 
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            throw new Error('No Chrome session found (chrome plugin must run first)');
-        }
-
-        // Wait for page to be fully loaded
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
+        const timeoutMs = getEnvInt('DOM_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
 
-        const result = await dumpDom(url);
+        const result = await dumpDom(url, timeoutMs);
 
         if (result.success) {
             // Success - emit ArchiveResult
diff --git a/abx_plugins/plugins/dom/tests/conftest.py b/abx_plugins/plugins/dom/tests/conftest.py
new file mode 100644
index 0000000..87b3198
--- /dev/null
+++ b/abx_plugins/plugins/dom/tests/conftest.py
@@ -0,0 +1,12 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def require_chrome_runtime():
+    """Require chrome runtime prerequisites for integration tests."""
+    from abx_pkg import NpmProvider
+
+    try:
+        NpmProvider()
+    except Exception as exc:
+        pytest.fail(f"Chrome integration prerequisites unavailable: {exc}")
diff --git a/abx_plugins/plugins/dom/tests/test_dom.py b/abx_plugins/plugins/dom/tests/test_dom.py
index e026859..2d07d98 100644
--- a/abx_plugins/plugins/dom/tests/test_dom.py
+++ b/abx_plugins/plugins/dom/tests/test_dom.py
@@ -14,28 +14,28 @@
 import json
 import os
 import subprocess
-import sys
 import tempfile
 from pathlib import Path
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
     get_plugin_dir,
     get_hook_script,
-    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
-    PLUGINS_ROOT,
     chrome_session,
 )
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-DOM_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dom.*')
-NPM_PROVIDER_HOOK = get_hook_script(PLUGINS_ROOT / 'npm', 'on_Binary__install_using_npm_provider.py')
-TEST_URL = 'https://example.com'
+_DOM_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_dom.*")
+if _DOM_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+DOM_HOOK = _DOM_HOOK
+TEST_URL = "https://example.com"
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
 
 
 def test_hook_script_exists():
@@ -45,95 +45,124 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
+    from abx_pkg import Binary, EnvProvider
 
     # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_binary = Binary(name="node", binproviders=[EnvProvider()])
     node_loaded = node_binary.load()
     assert node_loaded and node_loaded.abspath, "Node.js required for dom plugin"
 
 
-def test_extracts_dom_from_example_com():
-    """Test full workflow: extract DOM from real example.com via hook."""
+def test_extracts_dom_from_example_com(require_chrome_runtime, chrome_test_url):
+    """Test full workflow: extract DOM from deterministic local fixture via hook."""
     # Prerequisites checked by earlier test
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            dom_dir = snapshot_chrome_dir.parent / 'dom'
+        with chrome_session(
+            tmpdir,
+            test_url=chrome_test_url,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            dom_dir = snapshot_chrome_dir.parent / "dom"
             dom_dir.mkdir(exist_ok=True)
 
             # Run DOM extraction hook
             result = subprocess.run(
-                ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+                [
+                    "node",
+                    str(DOM_HOOK),
+                    f"--url={chrome_test_url}",
+                    "--snapshot-id=test789",
+                ],
                 cwd=dom_dir,
                 capture_output=True,
                 text=True,
                 timeout=120,
-                env=env
+                env=env,
             )
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify filesystem output (hook writes directly to working dir)
-        dom_file = dom_dir / 'output.html'
-        assert dom_file.exists(), f"output.html not created. Files: {list(tmpdir.iterdir())}"
+        dom_file = dom_dir / "output.html"
+        assert dom_file.exists(), (
+            f"output.html not created. Files: {list(tmpdir.iterdir())}"
+        )
 
         # Verify HTML content contains REAL example.com text
-        html_content = dom_file.read_text(errors='ignore')
-        assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
-        assert '<html' in html_content.lower(), "Missing <html> tag"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('this domain' in html_content.lower() or
-                'illustrative examples' in html_content.lower()), \
-            "Missing example.com description text"
+        html_content = dom_file.read_text(errors="ignore")
+        assert len(html_content) > 200, (
+            f"HTML content too short: {len(html_content)} bytes"
+        )
+        html_lower = html_content.lower()
+        assert "<html" in html_lower, "Missing <html> tag"
+        assert "example domain" in html_lower, "Missing 'Example Domain' in HTML"
+        assert (
+            "this domain" in html_lower
+            or "illustrative examples" in html_lower
+            or "local deterministic test page" in html_lower
+            or "chrome test helper fixture" in html_lower
+        ), "Missing expected description text in extracted HTML"
 
 
 def test_config_save_dom_false_skips():
     """Test that DOM_ENABLED=False exits without emitting JSONL."""
-    import os
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
         env = os.environ.copy()
-        env['DOM_ENABLED'] = 'False'
+        env["DOM_ENABLED"] = "False"
 
         result = subprocess.run(
-            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
+            ["node", str(DOM_HOOK), f"--url={TEST_URL}", "--snapshot-id=test999"],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping DOM' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping DOM" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_staticfile_present_skips():
@@ -141,47 +170,53 @@ def test_staticfile_present_skips():
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
         snap_dir = tmpdir
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
 
         # Create directory structure like real ArchiveBox:
         # tmpdir/
         #   staticfile/  <- staticfile extractor output
         #   dom/         <- dom extractor runs here, looks for ../staticfile
-        staticfile_dir = tmpdir / 'staticfile'
+        staticfile_dir = tmpdir / "staticfile"
         staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
+        (staticfile_dir / "stdout.log").write_text(
+            '{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n'
+        )
 
-        dom_dir = tmpdir / 'dom'
+        dom_dir = tmpdir / "dom"
         dom_dir.mkdir()
 
         result = subprocess.run(
-            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=teststatic'],
+            ["node", str(DOM_HOOK), f"--url={TEST_URL}", "--snapshot-id=teststatic"],
             cwd=dom_dir,  # Run from dom subdirectory
             capture_output=True,
             text=True,
-            timeout=30
-        ,
-            env=env)
+            timeout=30,
+            env=env,
+        )
 
         assert result.returncode == 0, "Should exit 0 when permanently skipping"
 
         # Permanent skip - should emit ArchiveResult with status='skipped'
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should emit ArchiveResult JSONL for permanent skip"
-        assert result_json['status'] == 'skipped', f"Should have status='skipped': {result_json}"
-        assert 'staticfile' in result_json.get('output_str', '').lower(), "Should mention staticfile in output_str"
+        assert result_json["status"] == "skipped", (
+            f"Should have status='skipped': {result_json}"
+        )
+        assert "staticfile" in result_json.get("output_str", "").lower(), (
+            "Should mention staticfile in output_str"
+        )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/env/on_Binary__15_env_install.py b/abx_plugins/plugins/env/on_Binary__15_env_install.py
index 235dfea..7edde6c 100755
--- a/abx_plugins/plugins/env/on_Binary__15_env_install.py
+++ b/abx_plugins/plugins/env/on_Binary__15_env_install.py
@@ -22,16 +22,18 @@
 
 
 @click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to find")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict (unused)")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--binary-id", required=True, help="Dependency UUID")
+@click.option("--name", required=True, help="Binary name to find")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--overrides", default=None, help="JSON-encoded overrides dict (unused)")
+def main(
+    binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None
+):
     """Check if binary is available in PATH and record it."""
 
     # Check if env provider is allowed
-    if binproviders != '*' and 'env' not in binproviders.split(','):
+    if binproviders != "*" and "env" not in binproviders.split(","):
         click.echo(f"env provider not allowed for {name}", err=True)
         sys.exit(0)  # Not an error, just skip
 
@@ -47,18 +49,18 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
         click.echo(f"{name} not found in PATH", err=True)
         sys.exit(1)
 
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = machine_id.strip() or os.environ.get("MACHINE_ID", "").strip()
 
     # Output Binary JSONL record to stdout
     record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
+        "type": "Binary",
+        "name": name,
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": "env",
+        "machine_id": machine_id,
+        "binary_id": binary_id,
     }
     print(json.dumps(record))
 
@@ -69,5 +71,5 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/env/tests/test_env_provider.py b/abx_plugins/plugins/env/tests/test_env_provider.py
index 907169d..d8fe9d0 100644
--- a/abx_plugins/plugins/env/tests/test_env_provider.py
+++ b/abx_plugins/plugins/env/tests/test_env_provider.py
@@ -16,7 +16,7 @@
 
 # Get the path to the env provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_env_install.py'), None)
+INSTALL_HOOK = next(PLUGIN_DIR.glob("on_Binary__*_env_install.py"), None)
 
 
 class TestEnvProviderHook:
@@ -29,6 +29,7 @@ def setup_method(self, _method=None):
     def teardown_method(self, _method=None):
         """Clean up."""
         import shutil
+
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def test_hook_script_exists(self):
@@ -38,34 +39,38 @@ def test_hook_script_exists(self):
     def test_hook_finds_python(self):
         """Hook should find python3 binary in PATH."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=python3",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should succeed and output JSONL
         assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
         # Parse JSONL output
-        for line in result.stdout.split('\n'):
+        for line in result.stdout.split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'python3':
-                        assert record['binprovider'] == 'env'
-                        assert record['abspath']
-                        assert Path(record['abspath']).exists()
+                    if (
+                        record.get("type") == "Binary"
+                        and record.get("name") == "python3"
+                    ):
+                        assert record["binprovider"] == "env"
+                        assert record["abspath"]
+                        assert Path(record["abspath"]).exists()
                         return
                 except json.JSONDecodeError:
                     continue
@@ -75,33 +80,34 @@ def test_hook_finds_python(self):
     def test_hook_finds_bash(self):
         """Hook should find bash binary in PATH."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=bash',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=bash",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should succeed and output JSONL
         assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
         # Parse JSONL output
-        for line in result.stdout.split('\n'):
+        for line in result.stdout.split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'bash':
-                        assert record['binprovider'] == 'env'
-                        assert record['abspath']
+                    if record.get("type") == "Binary" and record.get("name") == "bash":
+                        assert record["binprovider"] == "env"
+                        assert record["abspath"]
                         return
                 except json.JSONDecodeError:
                     continue
@@ -111,48 +117,50 @@ def test_hook_finds_bash(self):
     def test_hook_fails_for_missing_binary(self):
         """Hook should fail for binary not in PATH."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_binary_xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=nonexistent_binary_xyz123",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should fail with exit code 1
         assert result.returncode == 1
-        assert 'not found' in result.stderr.lower()
+        assert "not found" in result.stderr.lower()
 
     def test_hook_skips_when_env_not_allowed(self):
         """Hook should skip when env not in allowed binproviders."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
+        env["SNAP_DIR"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # env not allowed
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=python3",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                "--binproviders=pip,apt",  # env not allowed
             ],
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should exit cleanly (code 0) when env not allowed
         assert result.returncode == 0
-        assert 'env provider not allowed' in result.stderr
+        assert "env provider not allowed" in result.stderr
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/favicon/on_Snapshot__11_favicon.bg.py b/abx_plugins/plugins/favicon/on_Snapshot__11_favicon.bg.py
index ed3e320..cb4207c 100755
--- a/abx_plugins/plugins/favicon/on_Snapshot__11_favicon.bg.py
+++ b/abx_plugins/plugins/favicon/on_Snapshot__11_favicon.bg.py
@@ -3,7 +3,6 @@
 # requires-python = ">=3.12"
 # dependencies = [
 #   "rich-click",
-#   "requests",
 # ]
 # ///
 #
@@ -17,23 +16,26 @@
 import os
 import re
 import sys
+
 from pathlib import Path
+from urllib.error import HTTPError
 from urllib.parse import urljoin, urlparse
+from urllib.request import Request, urlopen
 
 import rich_click as click
 
 
 # Extractor metadata
-PLUGIN_NAME = 'favicon'
+PLUGIN_NAME = "favicon"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-OUTPUT_FILE = 'favicon.ico'
+OUTPUT_FILE = "favicon.ico"
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
@@ -44,48 +46,54 @@ def get_env_int(name: str, default: int = 0) -> int:
         return default
 
 
+def http_get(url: str, headers: dict[str, str], timeout: int) -> tuple[int, bytes]:
+    req = Request(url, headers=headers)
+    try:
+        with urlopen(req, timeout=timeout) as response:
+            return response.getcode() or 0, response.read()
+    except HTTPError as e:
+        return e.code, e.read()
+
+
 def get_favicon(url: str) -> tuple[bool, str | None, str]:
     """
     Fetch favicon from URL.
 
     Returns: (success, output_path, error_message)
     """
-    try:
-        import requests
-    except ImportError:
-        return False, None, 'requests library not installed'
 
-    timeout = get_env_int('FAVICON_TIMEOUT') or get_env_int('TIMEOUT', 30)
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-    headers = {'User-Agent': user_agent}
+    timeout = get_env_int("FAVICON_TIMEOUT") or get_env_int("TIMEOUT", 30)
+    user_agent = get_env("USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)")
+    headers = {"User-Agent": user_agent}
 
     # Build list of possible favicon URLs
     parsed = urlparse(url)
     base_url = f"{parsed.scheme}://{parsed.netloc}"
 
     favicon_urls = [
-        urljoin(base_url, '/favicon.ico'),
-        urljoin(base_url, '/favicon.png'),
-        urljoin(base_url, '/apple-touch-icon.png'),
+        urljoin(base_url, "/favicon.ico"),
+        urljoin(base_url, "/favicon.png"),
+        urljoin(base_url, "/apple-touch-icon.png"),
     ]
 
     # Try to extract favicon URL from HTML link tags
     try:
-        response = requests.get(url, timeout=timeout, headers=headers)
-        if response.ok:
+        status_code, body = http_get(url, headers=headers, timeout=timeout)
+        if 200 <= status_code < 300 and body:
+            html = body.decode("utf-8", errors="replace")
             # Look for <link rel="icon" href="...">
             for match in re.finditer(
                 r'<link[^>]+rel=["\'](?:shortcut )?icon["\'][^>]+href=["\']([^"\']+)["\']',
-                response.text,
-                re.I
+                html,
+                re.I,
             ):
                 favicon_urls.insert(0, urljoin(url, match.group(1)))
 
             # Also check reverse order: href before rel
             for match in re.finditer(
                 r'<link[^>]+href=["\']([^"\']+)["\'][^>]+rel=["\'](?:shortcut )?icon["\']',
-                response.text,
-                re.I
+                html,
+                re.I,
             ):
                 favicon_urls.insert(0, urljoin(url, match.group(1)))
     except Exception:
@@ -94,61 +102,61 @@ def get_favicon(url: str) -> tuple[bool, str | None, str]:
     # Try each URL until we find one that works
     for favicon_url in favicon_urls:
         try:
-            response = requests.get(favicon_url, timeout=15, headers=headers)
-            if response.ok and len(response.content) > 0:
-                Path(OUTPUT_FILE).write_bytes(response.content)
-                return True, OUTPUT_FILE, ''
+            status_code, body = http_get(favicon_url, headers=headers, timeout=15)
+            if 200 <= status_code < 300 and body:
+                Path(OUTPUT_FILE).write_bytes(body)
+                return True, OUTPUT_FILE, ""
         except Exception:
             continue
 
     # Try Google's favicon service as fallback
     try:
-        google_url = f'https://www.google.com/s2/favicons?domain={parsed.netloc}'
-        response = requests.get(google_url, timeout=15, headers=headers)
-        if response.ok and len(response.content) > 0:
-            Path(OUTPUT_FILE).write_bytes(response.content)
-            return True, OUTPUT_FILE, ''
+        google_url = f"https://www.google.com/s2/favicons?domain={parsed.netloc}"
+        status_code, body = http_get(google_url, headers=headers, timeout=15)
+        if 200 <= status_code < 300 and body:
+            Path(OUTPUT_FILE).write_bytes(body)
+            return True, OUTPUT_FILE, ""
     except Exception:
         pass
 
-    return False, None, 'No favicon found'
+    return False, None, "No favicon found"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to extract favicon from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to extract favicon from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Extract favicon from a URL."""
 
     output = None
-    status = 'failed'
-    error = ''
+    status = "failed"
+    error = ""
 
     try:
         # Run extraction
         success, output, error = get_favicon(url)
         if success:
-            status = 'succeeded'
+            status = "succeeded"
         else:
-            status = 'failed'
+            status = "failed"
 
     except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
+        error = f"{type(e).__name__}: {e}"
+        status = "failed"
 
     if error:
-        print(f'ERROR: {error}', file=sys.stderr)
+        print(f"ERROR: {error}", file=sys.stderr)
 
     # Output clean JSONL (no RESULT_JSON= prefix)
     result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output or error or "",
     }
     print(json.dumps(result))
 
-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status == "succeeded" else 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/favicon/tests/test_favicon.py b/abx_plugins/plugins/favicon/tests/test_favicon.py
index 7bd3077..84228e9 100644
--- a/abx_plugins/plugins/favicon/tests/test_favicon.py
+++ b/abx_plugins/plugins/favicon/tests/test_favicon.py
@@ -24,13 +24,15 @@
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_plugin_dir,
     get_hook_script,
-    parse_jsonl_output,
 )
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-FAVICON_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_favicon.*')
-TEST_URL = 'https://example.com'
+_FAVICON_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_favicon.*")
+if _FAVICON_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+FAVICON_HOOK = _FAVICON_HOOK
+TEST_URL = "https://example.com"
 
 
 def test_hook_script_exists():
@@ -41,9 +43,9 @@ def test_hook_script_exists():
 def test_requests_library_available():
     """Test that requests library is available."""
     result = subprocess.run(
-        [sys.executable, '-c', 'import requests; print(requests.__version__)'],
+        [sys.executable, "-c", "import requests; print(requests.__version__)"],
         capture_output=True,
-        text=True
+        text=True,
     )
 
     if result.returncode != 0:
@@ -61,27 +63,33 @@ def test_extracts_favicon_from_example_com():
 
     # Check requests is available
     check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
+        [sys.executable, "-c", "import requests"], capture_output=True
     )
     if check_result.returncode != 0:
         pass
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(snap_dir)
+        env["SNAP_DIR"] = str(snap_dir)
 
         # Run favicon extraction
         result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(FAVICON_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             timeout=60,
-            env=env
+            env=env,
         )
 
         # May succeed (if Google service works) or fail (if no favicon)
@@ -89,13 +97,13 @@ def test_extracts_favicon_from_example_com():
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
@@ -104,37 +112,40 @@ def test_extracts_favicon_from_example_com():
         assert result_json, "Should have ArchiveResult JSONL output"
 
         # If it succeeded, verify the favicon file
-        if result_json['status'] == 'succeeded':
-            favicon_file = snap_dir / 'favicon' / 'favicon.ico'
+        if result_json["status"] == "succeeded":
+            favicon_file = snap_dir / "favicon" / "favicon.ico"
             assert favicon_file.exists(), "favicon.ico not created"
 
             # Verify file is not empty and contains actual image data
             file_size = favicon_file.stat().st_size
             assert file_size > 0, "Favicon file should not be empty"
-            assert file_size < 1024 * 1024, f"Favicon file suspiciously large: {file_size} bytes"
+            assert file_size < 1024 * 1024, (
+                f"Favicon file suspiciously large: {file_size} bytes"
+            )
 
             # Check for common image magic bytes
             favicon_data = favicon_file.read_bytes()
             # ICO, PNG, GIF, JPEG, or WebP
             is_image = (
-                favicon_data[:4] == b'\x00\x00\x01\x00' or  # ICO
-                favicon_data[:8] == b'\x89PNG\r\n\x1a\n' or  # PNG
-                favicon_data[:3] == b'GIF' or  # GIF
-                favicon_data[:2] == b'\xff\xd8' or  # JPEG
-                favicon_data[8:12] == b'WEBP'  # WebP
+                favicon_data[:4] == b"\x00\x00\x01\x00"  # ICO
+                or favicon_data[:8] == b"\x89PNG\r\n\x1a\n"  # PNG
+                or favicon_data[:3] == b"GIF"  # GIF
+                or favicon_data[:2] == b"\xff\xd8"  # JPEG
+                or favicon_data[8:12] == b"WEBP"  # WebP
             )
             assert is_image, "Favicon file should be a valid image format"
         else:
             # Failed as expected
-            assert result_json['status'] == 'failed', f"Should report failure: {result_json}"
+            assert result_json["status"] == "failed", (
+                f"Should report failure: {result_json}"
+            )
 
 
 def test_config_timeout_honored():
     """Test that TIMEOUT config is respected."""
 
     check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
+        [sys.executable, "-c", "import requests"], capture_output=True
     )
     if check_result.returncode != 0:
         pass
@@ -144,17 +155,25 @@ def test_config_timeout_honored():
 
         # Set very short timeout (but example.com should still succeed)
         import os
+
         env = os.environ.copy()
-        env['TIMEOUT'] = '5'
-        env['SNAP_DIR'] = str(tmpdir)
+        env["TIMEOUT"] = "5"
+        env["SNAP_DIR"] = str(tmpdir)
 
         result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
+            [
+                sys.executable,
+                str(FAVICON_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Should complete (success or fail, but not hang)
@@ -165,8 +184,7 @@ def test_config_user_agent():
     """Test that USER_AGENT config is used."""
 
     check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
+        [sys.executable, "-c", "import requests"], capture_output=True
     )
     if check_result.returncode != 0:
         pass
@@ -176,45 +194,54 @@ def test_config_user_agent():
 
         # Set custom user agent
         import os
+
         env = os.environ.copy()
-        env['USER_AGENT'] = 'TestBot/1.0'
-        env['SNAP_DIR'] = str(tmpdir)
+        env["USER_AGENT"] = "TestBot/1.0"
+        env["SNAP_DIR"] = str(tmpdir)
 
         result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'testua'],
+            [
+                sys.executable,
+                str(FAVICON_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testua",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Should succeed (example.com doesn't block)
         if result.returncode == 0:
             # Parse clean JSONL output
             result_json = None
-            for line in result.stdout.strip().split('\n'):
+            for line in result.stdout.strip().split("\n"):
                 line = line.strip()
-                if line.startswith('{'):
+                if line.startswith("{"):
                     pass
                     try:
                         record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
+                        if record.get("type") == "ArchiveResult":
                             result_json = record
                             break
                     except json.JSONDecodeError:
                         pass
 
             if result_json:
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+                assert result_json["status"] == "succeeded", (
+                    f"Should succeed: {result_json}"
+                )
 
 
 def test_handles_https_urls():
     """Test that HTTPS URLs work correctly."""
 
     check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
+        [sys.executable, "-c", "import requests"], capture_output=True
     )
     if check_result.returncode != 0:
         pass
@@ -223,9 +250,16 @@ def test_handles_https_urls():
         tmpdir = Path(tmpdir)
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmpdir)
+        env["SNAP_DIR"] = str(tmpdir)
         result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', 'https://example.org', '--snapshot-id', 'testhttps'],
+            [
+                sys.executable,
+                str(FAVICON_HOOK),
+                "--url",
+                "https://example.org",
+                "--snapshot-id",
+                "testhttps",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
@@ -234,7 +268,7 @@ def test_handles_https_urls():
         )
 
         if result.returncode == 0:
-            favicon_file = tmpdir / 'favicon' / 'favicon.ico'
+            favicon_file = tmpdir / "favicon" / "favicon.ico"
             if favicon_file.exists():
                 assert favicon_file.stat().st_size > 0
 
@@ -247,8 +281,7 @@ def test_handles_missing_favicon_gracefully():
     """
 
     check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
+        [sys.executable, "-c", "import requests"], capture_output=True
     )
     if check_result.returncode != 0:
         pass
@@ -258,9 +291,16 @@ def test_handles_missing_favicon_gracefully():
 
         # Try a URL that likely doesn't have a favicon
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmpdir)
+        env["SNAP_DIR"] = str(tmpdir)
         result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', 'https://example.com/nonexistent', '--snapshot-id', 'test404'],
+            [
+                sys.executable,
+                str(FAVICON_HOOK),
+                "--url",
+                "https://example.com/nonexistent",
+                "--snapshot-id",
+                "test404",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
@@ -273,7 +313,7 @@ def test_handles_missing_favicon_gracefully():
 
         if result.returncode != 0:
             combined = result.stdout + result.stderr
-            assert 'No favicon found' in combined or 'ERROR=' in combined
+            assert "No favicon found" in combined or "ERROR=" in combined
 
 
 def test_reports_missing_requests_library():
@@ -284,25 +324,38 @@ def test_reports_missing_requests_library():
 
         # Run with PYTHONPATH cleared to simulate missing requests
         import os
+
         env = os.environ.copy()
         # Keep only minimal PATH, clear PYTHONPATH
-        env['PYTHONPATH'] = '/nonexistent'
-        env['SNAP_DIR'] = str(tmpdir)
+        env["PYTHONPATH"] = "/nonexistent"
+        env["SNAP_DIR"] = str(tmpdir)
 
         result = subprocess.run(
-            [sys.executable, '-S', str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
+            [
+                sys.executable,
+                "-S",
+                str(FAVICON_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test123",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            env=env
+            env=env,
         )
 
         # Should fail and report missing requests
         if result.returncode != 0:
             combined = result.stdout + result.stderr
             # May report missing requests or other import errors
-            assert 'requests' in combined.lower() or 'import' in combined.lower() or 'ERROR=' in combined
+            assert (
+                "requests" in combined.lower()
+                or "import" in combined.lower()
+                or "ERROR=" in combined
+            )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/forumdl/config.json b/abx_plugins/plugins/forumdl/config.json
index 9e9ea10..1e7643d 100644
--- a/abx_plugins/plugins/forumdl/config.json
+++ b/abx_plugins/plugins/forumdl/config.json
@@ -27,12 +27,6 @@
       "enum": ["jsonl", "warc", "mbox", "maildir", "mh", "mmdf", "babyl"],
       "description": "Output format for forum downloads"
     },
-    "FORUMDL_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
     "FORUMDL_ARGS": {
       "type": "array",
       "items": {"type": "string"},
diff --git a/abx_plugins/plugins/forumdl/forum-dl-wrapper.py b/abx_plugins/plugins/forumdl/forum-dl-wrapper.py
deleted file mode 100755
index aa0961d..0000000
--- a/abx_plugins/plugins/forumdl/forum-dl-wrapper.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env -S uv run --script
-# /// script
-# requires-python = ">=3.12"
-# dependencies = [
-#   "forum-dl",
-#   "pydantic",
-# ]
-# ///
-#
-# Wrapper for forum-dl that applies Pydantic v2 compatibility patches.
-# Fixes forum-dl 0.3.0's incompatibility with Pydantic v2 by monkey-patching the JsonlWriter class.
-#
-# Usage:
-#     ./forum-dl-wrapper.py [...] > events.jsonl
-
-import sys
-
-# Apply Pydantic v2 compatibility patch BEFORE importing forum_dl
-try:
-    from forum_dl.writers.jsonl import JsonlWriter
-    from pydantic import BaseModel
-
-    # Check if we're using Pydantic v2
-    if hasattr(BaseModel, 'model_dump_json'):
-        def _patched_serialize_entry(self, entry):
-            """Use Pydantic v2's model_dump_json() instead of deprecated json(models_as_dict=False)"""
-            return entry.model_dump_json()
-
-        JsonlWriter._serialize_entry = _patched_serialize_entry
-except (ImportError, AttributeError):
-    # forum-dl not installed or already compatible - no patch needed
-    pass
-
-# Now import and run forum-dl's main function
-from forum_dl import main
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/abx_plugins/plugins/forumdl/on_Crawl__25_forumdl_install.py b/abx_plugins/plugins/forumdl/on_Crawl__25_forumdl_install.py
index 7e0ef78..a0e1188 100755
--- a/abx_plugins/plugins/forumdl/on_Crawl__25_forumdl_install.py
+++ b/abx_plugins/plugins/forumdl/on_Crawl__25_forumdl_install.py
@@ -13,75 +13,79 @@
 import os
 import sys
 from pathlib import Path
+from typing import Any
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
+def output_binary(
+    name: str, binproviders: str, overrides: dict[str, Any] | None = None
+) -> None:
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+    record: dict[str, Any] = {
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     if overrides:
-        record['overrides'] = overrides
+        record["overrides"] = overrides
     print(json.dumps(record))
 
 
 def main():
-    forumdl_enabled = get_env_bool('FORUMDL_ENABLED', True)
+    forumdl_enabled = get_env_bool("FORUMDL_ENABLED", True)
 
     if not forumdl_enabled:
         sys.exit(0)
 
     output_binary(
-        name='forum-dl',
-        binproviders='pip,env',
+        name="forum-dl",
+        binproviders="pip,env",
         overrides={
-            'pip': {
-                'packages': [
-                    '--no-deps',
-                    '--prefer-binary',
-                    'forum-dl',
-                    'chardet==5.2.0',
-                    'pydantic',
-                    'pydantic-core',
-                    'typing-extensions',
-                    'annotated-types',
-                    'typing-inspection',
-                    'beautifulsoup4',
-                    'soupsieve',
-                    'lxml',
-                    'requests',
-                    'urllib3',
-                    'certifi',
-                    'idna',
-                    'charset-normalizer',
-                    'tenacity',
-                    'python-dateutil',
-                    'six',
-                    'html2text',
-                    'warcio',
+            "pip": {
+                "packages": [
+                    "--no-deps",
+                    "--prefer-binary",
+                    "forum-dl",
+                    "chardet==5.2.0",
+                    "pydantic==2.12.3",
+                    "pydantic-core==2.41.4",
+                    "typing-extensions>=4.14.1",
+                    "annotated-types>=0.6.0",
+                    "typing-inspection>=0.4.2",
+                    "beautifulsoup4",
+                    "soupsieve",
+                    "lxml",
+                    "requests",
+                    "urllib3",
+                    "certifi",
+                    "idna",
+                    "charset-normalizer",
+                    "tenacity",
+                    "python-dateutil",
+                    "six",
+                    "html2text",
+                    "warcio",
                 ]
             }
         },
@@ -90,5 +94,5 @@ def main():
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/forumdl/on_Snapshot__04_forumdl.bg.py b/abx_plugins/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
index b67151e..36436e1 100755
--- a/abx_plugins/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
+++ b/abx_plugins/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
@@ -19,51 +19,33 @@
 import shutil
 import subprocess
 import sys
+import textwrap
 import threading
 from pathlib import Path
 
 import rich_click as click
 
 
-# Monkey patch forum-dl for Pydantic v2 compatibility
-# forum-dl 0.3.0 uses deprecated json(models_as_dict=False) which doesn't work in Pydantic v2
-try:
-    from forum_dl.writers.jsonl import JsonlWriter
-    from pydantic import BaseModel
-
-    # Check if we're using Pydantic v2 (has model_dump_json)
-    if hasattr(BaseModel, 'model_dump_json'):
-        # Patch JsonlWriter to use Pydantic v2 API
-        original_serialize = JsonlWriter._serialize_entry
-
-        def _patched_serialize_entry(self, entry):
-            # Use Pydantic v2's model_dump_json() instead of deprecated json(models_as_dict=False)
-            return entry.model_dump_json()
-
-        JsonlWriter._serialize_entry = _patched_serialize_entry
-except (ImportError, AttributeError):
-    # forum-dl not installed or already compatible
-    pass
-
-
 # Extractor metadata
-PLUGIN_NAME = 'forumdl'
-BIN_NAME = 'forum-dl'
-BIN_PROVIDERS = 'pip,env'
+PLUGIN_NAME = "forumdl"
+BIN_NAME = "forum-dl"
+BIN_PROVIDERS = "pip,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-def get_env(name: str, default: str = '') -> str:
+
+
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -77,7 +59,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -92,10 +74,10 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
 def get_binary_shebang(binary_path: str) -> str | None:
     """Return interpreter from shebang line if present (e.g., /path/to/python)."""
     try:
-        with open(binary_path, 'r', encoding='utf-8') as f:
+        with open(binary_path, "r", encoding="utf-8") as f:
             first_line = f.readline().strip()
-            if first_line.startswith('#!'):
-                return first_line[2:].strip().split(' ')[0]
+            if first_line.startswith("#!"):
+                return first_line[2:].strip().split(" ")[0]
     except Exception:
         pass
     return None
@@ -110,7 +92,6 @@ def resolve_binary_path(binary: str) -> str | None:
     return shutil.which(binary)
 
 
-
 def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
     """
     Download forum using forum-dl.
@@ -118,38 +99,57 @@ def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
     Returns: (success, output_path, error_message)
     """
     # Get config from env (with FORUMDL_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('FORUMDL_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('FORUMDL_CHECK_SSL_VALIDITY', True) if get_env('FORUMDL_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    forumdl_args = get_env_array('FORUMDL_ARGS', [])
-    forumdl_args_extra = get_env_array('FORUMDL_ARGS_EXTRA', [])
-    output_format = get_env('FORUMDL_OUTPUT_FORMAT', 'jsonl')
+    timeout = get_env_int("FORUMDL_TIMEOUT") or get_env_int("TIMEOUT", 3600)
+    forumdl_args = get_env_array("FORUMDL_ARGS", [])
+    forumdl_args_extra = get_env_array("FORUMDL_ARGS_EXTRA", [])
+    output_format = get_env("FORUMDL_OUTPUT_FORMAT", "jsonl")
 
     # Output directory is current directory (hook already runs in output dir)
     output_dir = Path(OUTPUT_DIR)
 
     # Build output filename based on format
-    if output_format == 'warc':
-        output_file = output_dir / 'forum.warc.gz'
-    elif output_format == 'jsonl':
-        output_file = output_dir / 'forum.jsonl'
-    elif output_format == 'maildir':
-        output_file = output_dir / 'forum'  # maildir is a directory
-    elif output_format in ('mbox', 'mh', 'mmdf', 'babyl'):
-        output_file = output_dir / f'forum.{output_format}'
+    if output_format == "warc":
+        output_file = output_dir / "forum.warc.gz"
+    elif output_format == "jsonl":
+        output_file = output_dir / "forum.jsonl"
+    elif output_format == "maildir":
+        output_file = output_dir / "forum"  # maildir is a directory
+    elif output_format in ("mbox", "mh", "mmdf", "babyl"):
+        output_file = output_dir / f"forum.{output_format}"
     else:
-        output_file = output_dir / f'forum.{output_format}'
+        output_file = output_dir / f"forum.{output_format}"
 
-    # Use our Pydantic v2 compatible wrapper if available, otherwise fall back to binary
-    wrapper_path = Path(__file__).parent / 'forum-dl-wrapper.py'
     resolved_binary = resolve_binary_path(binary) or binary
-    if wrapper_path.exists():
-        forumdl_python = get_binary_shebang(resolved_binary) or sys.executable
-        cmd = [forumdl_python, str(wrapper_path), *forumdl_args, '-f', output_format, '-o', str(output_file)]
-    else:
-        cmd = [resolved_binary, *forumdl_args, '-f', output_format, '-o', str(output_file)]
-
-    if not check_ssl:
-        cmd.append('--no-check-certificate')
+    forumdl_python = get_binary_shebang(resolved_binary) or sys.executable
+    # Inline compatibility shim so this hook stays self-contained.
+    # Always run through this shim so forum-dl serialization stays compatible
+    # with Pydantic v2 even when binary shebang detection fails.
+    inline_entrypoint = textwrap.dedent(
+        """
+        import sys
+        try:
+            from forum_dl.writers.jsonl import JsonlWriter
+            from pydantic import BaseModel
+            if hasattr(BaseModel, "model_dump_json"):
+                def _patched_serialize_entry(self, entry):
+                    return entry.model_dump_json()
+                JsonlWriter._serialize_entry = _patched_serialize_entry
+        except Exception:
+            pass
+        from forum_dl import main
+        raise SystemExit(main())
+        """
+    ).strip()
+    cmd = [
+        forumdl_python,
+        "-c",
+        inline_entrypoint,
+        *forumdl_args,
+        "-f",
+        output_format,
+        "-o",
+        str(output_file),
+    ]
 
     if forumdl_args_extra:
         cmd.extend(forumdl_args_extra)
@@ -157,7 +157,7 @@ def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
     cmd.append(url)
 
     try:
-        print(f'[forumdl] Starting download (timeout={timeout}s)', file=sys.stderr)
+        print(f"[forumdl] Starting download (timeout={timeout}s)", file=sys.stderr)
         output_lines: list[str] = []
         process = subprocess.Popen(
             cmd,
@@ -182,63 +182,70 @@ def _read_output() -> None:
         except subprocess.TimeoutExpired:
             process.kill()
             reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
+            return False, None, f"Timed out after {timeout} seconds"
 
         reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
+        combined_output = "".join(output_lines)
 
         # Check if output file was created
         if output_file.exists() and output_file.stat().st_size > 0:
-            return True, str(output_file), ''
+            return True, str(output_file), ""
         else:
             stderr = combined_output
 
             # These are NOT errors - page simply has no downloadable forum content
             stderr_lower = stderr.lower()
-            if 'unsupported url' in stderr_lower:
-                return True, None, ''  # Not a forum site - success, no output
-            if 'no content' in stderr_lower:
-                return True, None, ''  # No forum found - success, no output
-            if 'extractornotfounderror' in stderr_lower:
-                return True, None, ''  # No forum extractor for this URL - success, no output
+            if "unsupported url" in stderr_lower:
+                return True, None, ""  # Not a forum site - success, no output
+            if "no content" in stderr_lower:
+                return True, None, ""  # No forum found - success, no output
+            if "extractornotfounderror" in stderr_lower:
+                return (
+                    True,
+                    None,
+                    "",
+                )  # No forum extractor for this URL - success, no output
             if process.returncode == 0:
-                return True, None, ''  # forum-dl exited cleanly, just no forum - success
+                return (
+                    True,
+                    None,
+                    "",
+                )  # forum-dl exited cleanly, just no forum - success
 
             # These ARE errors - something went wrong
-            if '404' in stderr:
-                return False, None, '404 Not Found'
-            if '403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'unable to extract' in stderr_lower:
-                return False, None, 'Unable to extract forum info'
+            if "404" in stderr:
+                return False, None, "404 Not Found"
+            if "403" in stderr:
+                return False, None, "403 Forbidden"
+            if "unable to extract" in stderr_lower:
+                return False, None, "Unable to extract forum info"
 
-            return False, None, f'forum-dl error: {stderr}'
+            return False, None, f"forum-dl error: {stderr}"
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to download forum from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to download forum from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Download forum content from a URL using forum-dl."""
 
     output = None
-    status = 'failed'
-    error = ''
+    error = ""
 
     try:
         # Check if forum-dl is enabled
-        if not get_env_bool('FORUMDL_ENABLED', True):
-            print('Skipping forum-dl (FORUMDL_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("FORUMDL_ENABLED", True):
+            print("Skipping forum-dl (FORUMDL_ENABLED=False)", file=sys.stderr)
             # Temporary failure (config disabled) - NO JSONL emission
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('FORUMDL_BINARY', 'forum-dl')
+        binary = get_env("FORUMDL_BINARY", "forum-dl")
 
         # Run extraction
         success, output, error = save_forum(url, binary)
@@ -246,22 +253,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/forumdl/tests/test_forumdl.py b/abx_plugins/plugins/forumdl/tests/test_forumdl.py
index b71eb08..8528d8e 100644
--- a/abx_plugins/plugins/forumdl/tests/test_forumdl.py
+++ b/abx_plugins/plugins/forumdl/tests/test_forumdl.py
@@ -24,13 +24,28 @@
 
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
-FORUMDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_forumdl.*'), None)
-TEST_URL = 'https://example.com'
+_FORUMDL_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_forumdl.*"), None)
+if _FORUMDL_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+FORUMDL_HOOK = _FORUMDL_HOOK
+TEST_URL = "http://example.com"
 
 # Module-level cache for binary path
 _forumdl_binary_path = None
 _forumdl_lib_root = None
 
+
+def require_forumdl_binary() -> str:
+    """Return forum-dl binary path or fail with actionable context."""
+    binary_path = get_forumdl_binary_path()
+    assert binary_path, (
+        "forum-dl installation failed. Install hook should install forum-dl automatically "
+        "with macOS-compatible dependencies."
+    )
+    assert Path(binary_path).is_file(), f"forum-dl binary path invalid: {binary_path}"
+    return binary_path
+
+
 def get_forumdl_binary_path():
     """Get the installed forum-dl binary path from cache or by running installation."""
     global _forumdl_binary_path
@@ -38,12 +53,11 @@ def get_forumdl_binary_path():
         return _forumdl_binary_path
 
     # Try to find forum-dl binary using abx-pkg
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
+    from abx_pkg import Binary, PipProvider, EnvProvider
 
     try:
         binary = Binary(
-            name='forum-dl',
-            binproviders=[PipProvider(), EnvProvider()]
+            name="forum-dl", binproviders=[PipProvider(), EnvProvider()]
         ).load()
 
         if binary and binary.abspath:
@@ -53,8 +67,8 @@ def get_forumdl_binary_path():
         pass
 
     # If not found, try to install via pip using the crawl hook overrides
-    pip_hook = PLUGINS_ROOT / 'pip' / 'on_Binary__11_pip_install.py'
-    crawl_hook = PLUGIN_DIR / 'on_Crawl__25_forumdl_install.py'
+    pip_hook = PLUGINS_ROOT / "pip" / "on_Binary__11_pip_install.py"
+    crawl_hook = PLUGIN_DIR / "on_Crawl__25_forumdl_install.py"
     if pip_hook.exists():
         binary_id = str(uuid.uuid4())
         machine_id = str(uuid.uuid4())
@@ -67,12 +81,15 @@ def get_forumdl_binary_path():
                 text=True,
                 timeout=30,
             )
-            for crawl_line in crawl_result.stdout.strip().split('\n'):
-                if crawl_line.strip().startswith('{'):
+            for crawl_line in crawl_result.stdout.strip().split("\n"):
+                if crawl_line.strip().startswith("{"):
                     try:
                         crawl_record = json.loads(crawl_line)
-                        if crawl_record.get('type') == 'Binary' and crawl_record.get('name') == 'forum-dl':
-                            overrides = crawl_record.get('overrides')
+                        if (
+                            crawl_record.get("type") == "Binary"
+                            and crawl_record.get("name") == "forum-dl"
+                        ):
+                            overrides = crawl_record.get("overrides")
                             break
                     except json.JSONDecodeError:
                         continue
@@ -80,20 +97,24 @@ def get_forumdl_binary_path():
         # Create a persistent temp HOME for default LIB_DIR usage
         global _forumdl_lib_root
         if not _forumdl_lib_root:
-            _forumdl_lib_root = tempfile.mkdtemp(prefix='forumdl-lib-')
+            _forumdl_lib_root = tempfile.mkdtemp(prefix="forumdl-lib-")
         env = os.environ.copy()
-        env['HOME'] = str(_forumdl_lib_root)
-        env['SNAP_DIR'] = str(Path(_forumdl_lib_root) / 'data')
-        env.pop('LIB_DIR', None)
+        env["HOME"] = str(_forumdl_lib_root)
+        env["SNAP_DIR"] = str(Path(_forumdl_lib_root) / "data")
+        env.pop("LIB_DIR", None)
 
         cmd = [
-            sys.executable, str(pip_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'forum-dl'
+            sys.executable,
+            str(pip_hook),
+            "--binary-id",
+            binary_id,
+            "--machine-id",
+            machine_id,
+            "--name",
+            "forum-dl",
         ]
         if overrides:
-            cmd.append(f'--overrides={json.dumps(overrides)}')
+            cmd.append(f"--overrides={json.dumps(overrides)}")
 
         install_result = subprocess.run(
             cmd,
@@ -104,12 +125,15 @@ def get_forumdl_binary_path():
         )
 
         # Parse Binary from pip installation
-        for install_line in install_result.stdout.strip().split('\n'):
+        for install_line in install_result.stdout.strip().split("\n"):
             if install_line.strip():
                 try:
                     install_record = json.loads(install_line)
-                    if install_record.get('type') == 'Binary' and install_record.get('name') == 'forum-dl':
-                        _forumdl_binary_path = install_record.get('abspath')
+                    if (
+                        install_record.get("type") == "Binary"
+                        and install_record.get("name") == "forum-dl"
+                    ):
+                        _forumdl_binary_path = install_record.get("abspath")
                         return _forumdl_binary_path
                 except json.JSONDecodeError:
                     pass
@@ -124,62 +148,66 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify forum-dl is installed by calling the REAL installation hooks."""
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        assert False, (
-            "forum-dl installation failed. Install hook should install forum-dl automatically. "
-            "Note: forum-dl has a dependency on cchardet which may not compile on Python 3.14+ "
-            "due to removed longintrepr.h header."
-        )
-    assert Path(binary_path).is_file(), f"Binary path must be a valid file: {binary_path}"
+    binary_path = require_forumdl_binary()
+    assert Path(binary_path).is_file(), (
+        f"Binary path must be a valid file: {binary_path}"
+    )
 
 
-def test_handles_non_forum_url():
+def test_handles_non_forum_url(local_http_base_url):
     """Test that forum-dl extractor handles non-forum URLs gracefully via hook."""
     import os
 
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        pass
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
+    binary_path = require_forumdl_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['SNAP_DIR'] = str(tmpdir)
-        env.pop('LIB_DIR', None)
+        env["FORUMDL_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
+        env.pop("LIB_DIR", None)
 
         # Run forum-dl extraction hook on non-forum URL
         result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(FORUMDL_HOOK),
+                "--url",
+                local_http_base_url,
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Should exit 0 even for non-forum URL (graceful handling)
-        assert result.returncode == 0, f"Should handle non-forum URL gracefully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should handle non-forum URL gracefully: {result.stderr}"
+        )
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed even for non-forum URL: {result_json}"
+        assert result_json["status"] == "succeeded", (
+            f"Should succeed even for non-forum URL: {result_json}"
+        )
 
 
 def test_config_save_forumdl_false_skips():
@@ -188,59 +216,84 @@ def test_config_save_forumdl_false_skips():
 
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['FORUMDL_ENABLED'] = 'False'
-        env['SNAP_DIR'] = str(tmpdir)
-        env.pop('LIB_DIR', None)
+        env["FORUMDL_ENABLED"] = "False"
+        env["SNAP_DIR"] = str(tmpdir)
+        env.pop("LIB_DIR", None)
 
         result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(FORUMDL_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_config_timeout():
     """Test that FORUMDL_TIMEOUT config is respected."""
     import os
 
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        pass
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
+    binary_path = require_forumdl_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['FORUMDL_TIMEOUT'] = '5'
-        env['SNAP_DIR'] = str(tmpdir)
-        env.pop('LIB_DIR', None)
+        env["FORUMDL_BINARY"] = binary_path
+        env["FORUMDL_TIMEOUT"] = "5"
+        env["SNAP_DIR"] = str(tmpdir)
+        env.pop("LIB_DIR", None)
 
         start_time = time.time()
         result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
+            [
+                sys.executable,
+                str(FORUMDL_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
+            timeout=10,  # Should complete in 5s, use 10s as safety margin
         )
         elapsed_time = time.time() - start_time
 
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should complete without hanging: {result.stderr}"
+        )
         # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
+        assert elapsed_time <= 6.0, (
+            f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
+        )
 
 
 def test_real_forum_url():
@@ -250,67 +303,80 @@ def test_real_forum_url():
     """
     import os
 
-    binary_path = get_forumdl_binary_path()
-    assert binary_path, "forum-dl binary not available"
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
+    binary_path = require_forumdl_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         # Use HackerNews - one of the most reliable forum-dl extractors
-        forum_url = 'https://news.ycombinator.com/item?id=1'
+        forum_url = "https://news.ycombinator.com/item?id=1"
 
         env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['FORUMDL_TIMEOUT'] = '60'
-        env['FORUMDL_OUTPUT_FORMAT'] = 'jsonl'  # Use jsonl format
-        env['SNAP_DIR'] = str(tmpdir)
-        env.pop('LIB_DIR', None)
+        env["FORUMDL_BINARY"] = binary_path
+        env["FORUMDL_TIMEOUT"] = "60"
+        env["FORUMDL_OUTPUT_FORMAT"] = "jsonl"  # Use jsonl format
+        env["SNAP_DIR"] = str(tmpdir)
+        env.pop("LIB_DIR", None)
         # HTML output could be added via: env['FORUMDL_ARGS_EXTRA'] = json.dumps(['--files-output', './files'])
 
         start_time = time.time()
         result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', forum_url, '--snapshot-id', 'testforum'],
+            [
+                sys.executable,
+                str(FORUMDL_HOOK),
+                "--url",
+                forum_url,
+                "--snapshot-id",
+                "testforum",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=90
+            timeout=90,
         )
         elapsed_time = time.time() - start_time
 
         # Should succeed with our Pydantic v2 wrapper
-        assert result.returncode == 0, f"Should extract forum successfully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should extract forum successfully: {result.stderr}"
+        )
 
         # Parse JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json, (
+            f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
+        )
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Check that forum files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
+        output_files = list(tmpdir.glob("**/*"))
         forum_files = [f for f in output_files if f.is_file()]
 
-        assert len(forum_files) > 0, f"Should have downloaded at least one forum file. Files: {output_files}"
+        assert len(forum_files) > 0, (
+            f"Should have downloaded at least one forum file. Files: {output_files}"
+        )
 
         # Verify the JSONL file has content
-        jsonl_file = tmpdir / 'forumdl' / 'forum.jsonl'
+        jsonl_file = tmpdir / "forumdl" / "forum.jsonl"
         assert jsonl_file.exists(), "Should have created forum.jsonl"
         assert jsonl_file.stat().st_size > 0, "forum.jsonl should not be empty"
 
-        print(f"Successfully extracted {len(forum_files)} file(s) in {elapsed_time:.2f}s")
+        print(
+            f"Successfully extracted {len(forum_files)} file(s) in {elapsed_time:.2f}s"
+        )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/gallerydl/on_Crawl__20_gallerydl_install.py b/abx_plugins/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
index 9a9f79c..9ce27d2 100755
--- a/abx_plugins/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
+++ b/abx_plugins/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
@@ -15,47 +15,48 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
 def output_binary(name: str, binproviders: str):
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
     record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     print(json.dumps(record))
 
 
 def main():
-    gallerydl_enabled = get_env_bool('GALLERYDL_ENABLED', default=True)
+    gallerydl_enabled = get_env_bool("GALLERYDL_ENABLED", default=True)
 
     if not gallerydl_enabled:
         sys.exit(0)
 
-    output_binary(name='gallery-dl', binproviders='pip,brew,apt,env')
+    output_binary(name="gallery-dl", binproviders="pip,brew,apt,env")
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py b/abx_plugins/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
index 1cf6468..c393d68 100755
--- a/abx_plugins/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
+++ b/abx_plugins/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
@@ -23,23 +23,25 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'gallerydl'
-BIN_NAME = 'gallery-dl'
-BIN_PROVIDERS = 'pip,env'
+PLUGIN_NAME = "gallerydl"
+BIN_NAME = "gallery-dl"
+BIN_PROVIDERS = "pip,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-def get_env(name: str, default: str = '') -> str:
+
+
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -53,7 +55,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -65,25 +67,29 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
         return default if default is not None else []
 
 
-STATICFILE_DIR = '../staticfile'
+STATICFILE_DIR = "../staticfile"
+
 
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
     if not staticfile_dir.exists():
         return False
-    stdout_log = staticfile_dir / 'stdout.log'
+    stdout_log = staticfile_dir / "stdout.log"
     if not stdout_log.exists():
         return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
+    for line in stdout_log.read_text(errors="ignore").splitlines():
         line = line.strip()
-        if not line.startswith('{'):
+        if not line.startswith("{"):
             continue
         try:
             record = json.loads(line)
         except json.JSONDecodeError:
             continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+        if (
+            record.get("type") == "ArchiveResult"
+            and record.get("status") == "succeeded"
+        ):
             return True
     return False
 
@@ -95,11 +101,15 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
     Returns: (success, output_path, error_message)
     """
     # Get config from env (with GALLERYDL_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('GALLERYDL_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('GALLERYDL_CHECK_SSL_VALIDITY', True) if get_env('GALLERYDL_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    gallerydl_args = get_env_array('GALLERYDL_ARGS', [])
-    gallerydl_args_extra = get_env_array('GALLERYDL_ARGS_EXTRA', [])
-    cookies_file = get_env('GALLERYDL_COOKIES_FILE') or get_env('COOKIES_FILE', '')
+    timeout = get_env_int("GALLERYDL_TIMEOUT") or get_env_int("TIMEOUT", 3600)
+    check_ssl = (
+        get_env_bool("GALLERYDL_CHECK_SSL_VALIDITY", True)
+        if get_env("GALLERYDL_CHECK_SSL_VALIDITY")
+        else get_env_bool("CHECK_SSL_VALIDITY", True)
+    )
+    gallerydl_args = get_env_array("GALLERYDL_ARGS", [])
+    gallerydl_args_extra = get_env_array("GALLERYDL_ARGS_EXTRA", [])
+    cookies_file = get_env("GALLERYDL_COOKIES_FILE") or get_env("COOKIES_FILE", "")
 
     # Output directory is current directory (hook already runs in output dir)
     output_dir = Path(OUTPUT_DIR)
@@ -109,14 +119,15 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
     cmd = [
         binary,
         *gallerydl_args,
-        '-D', str(output_dir),
+        "-D",
+        str(output_dir),
     ]
 
     if not check_ssl:
-        cmd.append('--no-check-certificate')
+        cmd.append("--no-check-certificate")
 
     if cookies_file and Path(cookies_file).exists():
-        cmd.extend(['-C', cookies_file])
+        cmd.extend(["-C", cookies_file])
 
     if gallerydl_args_extra:
         cmd.extend(gallerydl_args_extra)
@@ -124,7 +135,7 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
     cmd.append(url)
 
     try:
-        print(f'[gallerydl] Starting download (timeout={timeout}s)', file=sys.stderr)
+        print(f"[gallerydl] Starting download (timeout={timeout}s)", file=sys.stderr)
         output_lines: list[str] = []
         process = subprocess.Popen(
             cmd,
@@ -149,89 +160,115 @@ def _read_output() -> None:
         except subprocess.TimeoutExpired:
             process.kill()
             reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
+            return False, None, f"Timed out after {timeout} seconds"
 
         reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
+        combined_output = "".join(output_lines)
 
         # Check if any gallery files were downloaded (search recursively)
         gallery_extensions = (
-            '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg',
-            '.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv',
-            '.json', '.txt', '.zip',
+            ".jpg",
+            ".jpeg",
+            ".png",
+            ".gif",
+            ".webp",
+            ".bmp",
+            ".svg",
+            ".mp4",
+            ".webm",
+            ".mkv",
+            ".avi",
+            ".mov",
+            ".flv",
+            ".json",
+            ".txt",
+            ".zip",
         )
 
         downloaded_files = [
-            f for f in output_dir.rglob('*')
+            f
+            for f in output_dir.rglob("*")
             if f.is_file() and f.suffix.lower() in gallery_extensions
         ]
 
         if downloaded_files:
             # Return first image file, or first file if no images
             image_files = [
-                f for f in downloaded_files
-                if f.suffix.lower() in ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp')
+                f
+                for f in downloaded_files
+                if f.suffix.lower()
+                in (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
             ]
             output = str(image_files[0]) if image_files else str(downloaded_files[0])
-            return True, output, ''
+            return True, output, ""
         else:
             stderr = combined_output
 
             # These are NOT errors - page simply has no downloadable gallery
             # Return success with no output (legitimate "nothing to download")
             stderr_lower = stderr.lower()
-            if 'unsupported url' in stderr_lower:
-                return True, None, ''  # Not a gallery site - success, no output
-            if 'no results' in stderr_lower:
-                return True, None, ''  # No gallery found - success, no output
+            if "unsupported url" in stderr_lower:
+                return True, None, ""  # Not a gallery site - success, no output
+            if "no results" in stderr_lower:
+                return True, None, ""  # No gallery found - success, no output
             if process.returncode == 0:
-                return True, None, ''  # gallery-dl exited cleanly, just no gallery - success
+                return (
+                    True,
+                    None,
+                    "",
+                )  # gallery-dl exited cleanly, just no gallery - success
 
             # These ARE errors - something went wrong
-            if '404' in stderr:
-                return False, None, '404 Not Found'
-            if '403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'unable to extract' in stderr_lower:
-                return False, None, 'Unable to extract gallery info'
+            if "404" in stderr:
+                return False, None, "404 Not Found"
+            if "403" in stderr:
+                return False, None, "403 Forbidden"
+            if "unable to extract" in stderr_lower:
+                return False, None, "Unable to extract gallery info"
 
-            return False, None, f'gallery-dl error: {stderr}'
+            return False, None, f"gallery-dl error: {stderr}"
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to download gallery from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to download gallery from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Download image gallery from a URL using gallery-dl."""
 
     output = None
-    status = 'failed'
-    error = ''
+    error = ""
 
     try:
         # Check if gallery-dl is enabled
-        if not get_env_bool('GALLERYDL_ENABLED', True):
-            print('Skipping gallery-dl (GALLERYDL_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("GALLERYDL_ENABLED", True):
+            print("Skipping gallery-dl (GALLERYDL_ENABLED=False)", file=sys.stderr)
             # Temporary failure (config disabled) - NO JSONL emission
             sys.exit(0)
 
         # Check if staticfile extractor already handled this (permanent skip)
         if has_staticfile_output():
-            print(f'Skipping gallery-dl - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({
-                'type': 'ArchiveResult',
-                'status': 'skipped',
-                'output_str': 'staticfile already handled',
-            }))
+            print(
+                "Skipping gallery-dl - staticfile extractor already downloaded this",
+                file=sys.stderr,
+            )
+            print(
+                json.dumps(
+                    {
+                        "type": "ArchiveResult",
+                        "status": "skipped",
+                        "output_str": "staticfile already handled",
+                    }
+                )
+            )
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('GALLERYDL_BINARY', 'gallery-dl')
+        binary = get_env("GALLERYDL_BINARY", "gallery-dl")
 
         # Run extraction
         success, output, error = save_gallery(url, binary)
@@ -239,22 +276,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/gallerydl/tests/test_gallerydl.py b/abx_plugins/plugins/gallerydl/tests/test_gallerydl.py
index 7feedb1..83036f3 100644
--- a/abx_plugins/plugins/gallerydl/tests/test_gallerydl.py
+++ b/abx_plugins/plugins/gallerydl/tests/test_gallerydl.py
@@ -17,13 +17,125 @@
 import sys
 import tempfile
 import time
+import os
+import uuid
 from pathlib import Path
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
-GALLERYDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_gallerydl.*'), None)
-TEST_URL = 'https://example.com'
+_GALLERYDL_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_gallerydl.*"), None)
+if _GALLERYDL_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+GALLERYDL_HOOK = _GALLERYDL_HOOK
+TEST_URL = "https://example.com"
+
+# Module-level cache for binary path
+_gallerydl_binary_path = None
+_gallerydl_lib_root = None
+
+
+def require_gallerydl_binary() -> str:
+    """Return gallery-dl binary path or fail with actionable context."""
+    binary_path = get_gallerydl_binary_path()
+    assert binary_path, (
+        "gallery-dl installation failed. Install hook should install gallery-dl "
+        "automatically in this test environment."
+    )
+    assert Path(binary_path).is_file(), f"gallery-dl binary path invalid: {binary_path}"
+    return binary_path
+
+
+def get_gallerydl_binary_path():
+    """Get gallery-dl binary path from cache or by running install hooks."""
+    global _gallerydl_binary_path
+    if _gallerydl_binary_path and Path(_gallerydl_binary_path).is_file():
+        return _gallerydl_binary_path
+
+    # Try loading from existing providers first
+    from abx_pkg import Binary, PipProvider, EnvProvider
+
+    try:
+        binary = Binary(
+            name="gallery-dl", binproviders=[PipProvider(), EnvProvider()]
+        ).load()
+        if binary and binary.abspath:
+            _gallerydl_binary_path = str(binary.abspath)
+            return _gallerydl_binary_path
+    except Exception:
+        pass
+
+    # Install via real plugin hooks
+    pip_hook = PLUGINS_ROOT / "pip" / "on_Binary__11_pip_install.py"
+    crawl_hook = PLUGIN_DIR / "on_Crawl__20_gallerydl_install.py"
+    if not pip_hook.exists():
+        return None
+
+    binary_id = str(uuid.uuid4())
+    machine_id = str(uuid.uuid4())
+    overrides = None
+
+    if crawl_hook.exists():
+        crawl_result = subprocess.run(
+            [sys.executable, str(crawl_hook)],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        for line in crawl_result.stdout.strip().split("\n"):
+            if not line.strip().startswith("{"):
+                continue
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if record.get("type") == "Binary" and record.get("name") == "gallery-dl":
+                overrides = record.get("overrides")
+                break
+
+    global _gallerydl_lib_root
+    if not _gallerydl_lib_root:
+        _gallerydl_lib_root = tempfile.mkdtemp(prefix="gallerydl-lib-")
+
+    env = os.environ.copy()
+    env["HOME"] = str(_gallerydl_lib_root)
+    env["SNAP_DIR"] = str(Path(_gallerydl_lib_root) / "data")
+    env.pop("LIB_DIR", None)
+
+    cmd = [
+        sys.executable,
+        str(pip_hook),
+        "--binary-id",
+        binary_id,
+        "--machine-id",
+        machine_id,
+        "--name",
+        "gallery-dl",
+    ]
+    if overrides:
+        cmd.append(f"--overrides={json.dumps(overrides)}")
+
+    install_result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=300,
+        env=env,
+    )
+
+    for line in install_result.stdout.strip().split("\n"):
+        if not line.strip().startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get("type") == "Binary" and record.get("name") == "gallery-dl":
+            _gallerydl_binary_path = record.get("abspath")
+            return _gallerydl_binary_path
+
+    return None
+
 
 def test_hook_script_exists():
     """Verify on_Snapshot hook exists."""
@@ -31,56 +143,61 @@ def test_hook_script_exists():
 
 
 def test_verify_deps_with_abx_pkg():
-    """Verify gallery-dl is available via abx-pkg."""
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    missing_binaries = []
-
-    # Verify gallery-dl is available
-    gallerydl_binary = Binary(name='gallery-dl', binproviders=[PipProvider(), EnvProvider()])
-    gallerydl_loaded = gallerydl_binary.load()
-    if not (gallerydl_loaded and gallerydl_loaded.abspath):
-        missing_binaries.append('gallery-dl')
-
-    if missing_binaries:
-        pass
+    """Verify gallery-dl is installed by real plugin install hooks."""
+    binary_path = require_gallerydl_binary()
+    assert Path(binary_path).is_file(), (
+        f"Binary path must be a valid file: {binary_path}"
+    )
 
 
 def test_handles_non_gallery_url():
     """Test that gallery-dl extractor handles non-gallery URLs gracefully via hook."""
-    # Prerequisites checked by earlier test
+    binary_path = require_gallerydl_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
+        env = os.environ.copy()
+        env["GALLERYDL_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
 
         # Run gallery-dl extraction hook on non-gallery URL
         result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(GALLERYDL_HOOK),
+                "--url",
+                "https://example.com",
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=60
+            env=env,
+            timeout=60,
         )
 
         # Should exit 0 even for non-gallery URL
-        assert result.returncode == 0, f"Should handle non-gallery URL gracefully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should handle non-gallery URL gracefully: {result.stderr}"
+        )
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
 
 def test_config_save_gallery_dl_false_skips():
@@ -89,102 +206,186 @@ def test_config_save_gallery_dl_false_skips():
 
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['GALLERYDL_ENABLED'] = 'False'
+        env["GALLERYDL_ENABLED"] = "False"
 
         result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(GALLERYDL_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_config_timeout():
     """Test that GALLERY_DL_TIMEOUT config is respected."""
     import os
 
+    binary_path = require_gallerydl_binary()
+
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['GALLERY_DL_TIMEOUT'] = '5'
+        env["GALLERY_DL_TIMEOUT"] = "5"
+        env["GALLERYDL_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
 
         start_time = time.time()
         result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
+            [
+                sys.executable,
+                str(GALLERYDL_HOOK),
+                "--url",
+                "https://example.com",
+                "--snapshot-id",
+                "testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
+            timeout=10,  # Should complete in 5s, use 10s as safety margin
         )
         elapsed_time = time.time() - start_time
 
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should complete without hanging: {result.stderr}"
+        )
         # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
+        assert elapsed_time <= 6.0, (
+            f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
+        )
 
 
 def test_real_gallery_url():
     """Test that gallery-dl can extract images from a real Flickr gallery URL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use a real Flickr photo page
-        gallery_url = 'https://www.flickr.com/photos/gregorydolivet/55002388567/in/explore-2025-12-25/'
-
-        env = os.environ.copy()
-        env['GALLERY_DL_TIMEOUT'] = '60'  # Give it time to download
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', gallery_url, '--snapshot-id', 'testflickr'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=90
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed
-        assert result.returncode == 0, f"Should extract gallery successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that some files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        image_files = [f for f in output_files if f.is_file() and f.suffix.lower() in ('.jpg', '.jpeg', '.png', '.gif', '.webp')]
-
-        assert len(image_files) > 0, f"Should have downloaded at least one image. Files: {output_files}"
-
-        print(f"Successfully extracted {len(image_files)} image(s) in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+    binary_path = require_gallerydl_binary()
+
+    # Real public gallery URL that currently yields downloadable media.
+    gallery_url = "https://www.flickr.com/photos/gregorydolivet/55002388567/in/explore-2025-12-25/"
+
+    max_attempts = 3
+    last_error = ""
+
+    for attempt in range(1, max_attempts + 1):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            tmpdir = Path(tmpdir)
+            env = os.environ.copy()
+            env["GALLERYDL_TIMEOUT"] = "60"
+            env["GALLERYDL_BINARY"] = binary_path
+            env["SNAP_DIR"] = str(tmpdir)
+
+            start_time = time.time()
+            result = subprocess.run(
+                [
+                    sys.executable,
+                    str(GALLERYDL_HOOK),
+                    "--url",
+                    gallery_url,
+                    "--snapshot-id",
+                    f"testflickr{attempt}",
+                ],
+                cwd=tmpdir,
+                capture_output=True,
+                text=True,
+                env=env,
+                timeout=90,
+            )
+            elapsed_time = time.time() - start_time
+
+            if result.returncode != 0:
+                last_error = f"attempt={attempt} returncode={result.returncode} stderr={result.stderr}"
+                continue
+
+            result_json = None
+            for line in result.stdout.strip().split("\n"):
+                line = line.strip()
+                if line.startswith("{"):
+                    try:
+                        record = json.loads(line)
+                        if record.get("type") == "ArchiveResult":
+                            result_json = record
+                            break
+                    except json.JSONDecodeError:
+                        pass
+
+            if not result_json or result_json.get("status") != "succeeded":
+                last_error = f"attempt={attempt} invalid ArchiveResult stdout={result.stdout} stderr={result.stderr}"
+                continue
+
+            output_str = (result_json.get("output_str") or "").strip()
+            if not output_str:
+                last_error = f"attempt={attempt} empty output_str stdout={result.stdout} stderr={result.stderr}"
+                continue
+
+            output_path = Path(output_str)
+            if not output_path.is_file():
+                last_error = f"attempt={attempt} output missing path={output_path}"
+                continue
+
+            if output_path.suffix.lower() not in (
+                ".jpg",
+                ".jpeg",
+                ".png",
+                ".gif",
+                ".webp",
+                ".bmp",
+            ):
+                last_error = f"attempt={attempt} output is not image path={output_path}"
+                continue
+
+            if output_path.stat().st_size <= 0:
+                last_error = f"attempt={attempt} output file empty path={output_path}"
+                continue
+
+            # Ensure the extractor really downloaded image media, not just metadata.
+            output_files = list(tmpdir.rglob("*"))
+            image_files = [
+                f
+                for f in output_files
+                if f.is_file()
+                and f.suffix.lower()
+                in (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
+            ]
+            if not image_files:
+                last_error = f"attempt={attempt} no image files under SNAP_DIR={tmpdir}"
+                continue
+
+            print(
+                f"Successfully extracted {len(image_files)} image(s) in {elapsed_time:.2f}s"
+            )
+            return
+
+    pytest.fail(
+        f"Real gallery download did not yield an image after {max_attempts} attempts. Last error: {last_error}"
+    )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/git/on_Crawl__05_git_install.py b/abx_plugins/plugins/git/on_Crawl__05_git_install.py
index 489d539..c313e3b 100755
--- a/abx_plugins/plugins/git/on_Crawl__05_git_install.py
+++ b/abx_plugins/plugins/git/on_Crawl__05_git_install.py
@@ -15,47 +15,48 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
 def output_binary(name: str, binproviders: str):
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
     record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     print(json.dumps(record))
 
 
 def main():
-    git_enabled = get_env_bool('GIT_ENABLED', True)
+    git_enabled = get_env_bool("GIT_ENABLED", True)
 
     if not git_enabled:
         sys.exit(0)
 
-    output_binary(name='git', binproviders='apt,brew,env')
+    output_binary(name="git", binproviders="apt,brew,env")
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/git/on_Snapshot__05_git.bg.py b/abx_plugins/plugins/git/on_Snapshot__05_git.bg.py
index a75164f..1ca2591 100755
--- a/abx_plugins/plugins/git/on_Snapshot__05_git.bg.py
+++ b/abx_plugins/plugins/git/on_Snapshot__05_git.bg.py
@@ -22,15 +22,17 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'git'
-BIN_NAME = 'git'
-BIN_PROVIDERS = 'apt,brew,env'
+PLUGIN_NAME = "git"
+BIN_NAME = "git"
+BIN_PROVIDERS = "apt,brew,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-def get_env(name: str, default: str = '') -> str:
+
+
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
@@ -43,7 +45,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -58,12 +60,12 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
 def is_git_url(url: str) -> bool:
     """Check if URL looks like a git repository."""
     git_patterns = [
-        '.git',
-        'github.com',
-        'gitlab.com',
-        'bitbucket.org',
-        'git://',
-        'ssh://git@',
+        ".git",
+        "github.com",
+        "gitlab.com",
+        "bitbucket.org",
+        "git://",
+        "ssh://git@",
     ]
     return any(p in url.lower() for p in git_patterns)
 
@@ -74,9 +76,9 @@ def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
 
     Returns: (success, output_path, error_message)
     """
-    timeout = get_env_int('GIT_TIMEOUT') or get_env_int('TIMEOUT', 120)
-    git_args = get_env_array('GIT_ARGS', ["clone", "--depth=1", "--recursive"])
-    git_args_extra = get_env_array('GIT_ARGS_EXTRA', [])
+    timeout = get_env_int("GIT_TIMEOUT") or get_env_int("TIMEOUT", 120)
+    git_args = get_env_array("GIT_ARGS", ["clone", "--depth=1", "--recursive"])
+    git_args_extra = get_env_array("GIT_ARGS_EXTRA", [])
 
     cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
 
@@ -84,61 +86,65 @@ def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
         result = subprocess.run(cmd, timeout=timeout)
 
         if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
-            return True, OUTPUT_DIR, ''
+            return True, str(OUTPUT_DIR), ""
         else:
-            return False, None, f'git clone failed (exit={result.returncode})'
+            return False, None, f"git clone failed (exit={result.returncode})"
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='Git repository URL')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="Git repository URL")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Clone a git repository from a URL."""
 
     output = None
-    status = 'failed'
-    error = ''
+    status = "failed"
+    error = ""
 
     try:
         # Check if URL looks like a git repo
         if not is_git_url(url):
-            print(f'Skipping git clone for non-git URL: {url}', file=sys.stderr)
-            print(json.dumps({
-                'type': 'ArchiveResult',
-                'status': 'skipped',
-                'output_str': 'Not a git URL',
-            }))
+            print(f"Skipping git clone for non-git URL: {url}", file=sys.stderr)
+            print(
+                json.dumps(
+                    {
+                        "type": "ArchiveResult",
+                        "status": "skipped",
+                        "output_str": "Not a git URL",
+                    }
+                )
+            )
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('GIT_BINARY', 'git')
+        binary = get_env("GIT_BINARY", "git")
 
         # Run extraction
         success, output, error = clone_git(url, binary)
-        status = 'succeeded' if success else 'failed'
+        status = "succeeded" if success else "failed"
 
     except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
+        error = f"{type(e).__name__}: {e}"
+        status = "failed"
 
     if error:
-        print(f'ERROR: {error}', file=sys.stderr)
+        print(f"ERROR: {error}", file=sys.stderr)
 
     # Output clean JSONL (no RESULT_JSON= prefix)
     result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output or error or "",
     }
     print(json.dumps(result))
 
-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status == "succeeded" else 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/git/tests/test_git.py b/abx_plugins/plugins/git/tests/test_git.py
index c744949..526d9b6 100644
--- a/abx_plugins/plugins/git/tests/test_git.py
+++ b/abx_plugins/plugins/git/tests/test_git.py
@@ -18,52 +18,92 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-GIT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_git.*'), None)
-TEST_URL = 'https://github.com/ArchiveBox/abx-pkg.git'
+_GIT_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_git.*"), None)
+if _GIT_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+GIT_HOOK = _GIT_HOOK
+TEST_URL = "https://github.com/ArchiveBox/abx-pkg.git"
+
 
 def test_hook_script_exists():
     assert GIT_HOOK.exists()
 
+
 def test_verify_deps_with_abx_pkg():
     """Verify git is available via abx-pkg."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-    git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
+
+    try:
+        apt_provider = AptProvider()
+        brew_provider = BrewProvider()
+        env_provider = EnvProvider()
+    except Exception as exc:
+        pytest.fail(f"System package providers unavailable in this runtime: {exc}")
+
+    git_binary = Binary(
+        name="git", binproviders=[apt_provider, brew_provider, env_provider]
+    )
     git_loaded = git_binary.load()
 
     assert git_loaded and git_loaded.abspath, "git is required for git plugin tests"
 
+
 def test_reports_missing_git():
     with tempfile.TemporaryDirectory() as tmpdir:
-        env = {'PATH': '/nonexistent'}
+        env = {"PATH": "/nonexistent"}
         result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir, capture_output=True, text=True, env=env
+            [
+                sys.executable,
+                str(GIT_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test123",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
         )
         if result.returncode != 0:
             combined = result.stdout + result.stderr
-            assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
+            assert (
+                "DEPENDENCY_NEEDED" in combined
+                or "git" in combined.lower()
+                or "ERROR=" in combined
+            )
+
 
 def test_handles_non_git_url():
-    assert shutil.which('git'), "git binary not available"
+    assert shutil.which("git"), "git binary not available"
 
     with tempfile.TemporaryDirectory() as tmpdir:
         result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
+            [
+                sys.executable,
+                str(GIT_HOOK),
+                "--url",
+                "https://example.com",
+                "--snapshot-id",
+                "test789",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=30,
         )
         # Should fail or skip for non-git URL
         assert result.returncode in (0, 1)
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
@@ -71,60 +111,78 @@ def test_handles_non_git_url():
 
         if result_json:
             # Should report failure or skip for non-git URL
-            assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip: {result_json}"
+            assert result_json["status"] in ["failed", "skipped"], (
+                f"Should fail or skip: {result_json}"
+            )
 
 
 def test_real_git_repo():
     """Test that git can clone a real GitHub repository."""
     import os
 
-    assert shutil.which('git'), "git binary not available"
+    assert shutil.which("git"), "git binary not available"
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         # Use a real but small GitHub repository
-        git_url = 'https://github.com/ArchiveBox/abx-pkg'
+        git_url = "https://github.com/ArchiveBox/abx-pkg"
 
         env = os.environ.copy()
-        env['GIT_TIMEOUT'] = '120'  # Give it time to clone
+        env["GIT_TIMEOUT"] = "120"  # Give it time to clone
+        env["SNAP_DIR"] = str(tmpdir)
+        env["CRAWL_DIR"] = str(tmpdir)
 
         start_time = time.time()
         result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', git_url, '--snapshot-id', 'testgit'],
+            [
+                sys.executable,
+                str(GIT_HOOK),
+                "--url",
+                git_url,
+                "--snapshot-id",
+                "testgit",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=180
+            timeout=180,
         )
         elapsed_time = time.time() - start_time
 
         # Should succeed
-        assert result.returncode == 0, f"Should clone repository successfully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should clone repository successfully: {result.stderr}"
+        )
 
         # Parse JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json, (
+            f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
+        )
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
-        # Check that the git repo was cloned
-        git_dirs = list(tmpdir.glob('**/.git'))
-        assert len(git_dirs) > 0, f"Should have cloned a git repository. Contents: {list(tmpdir.rglob('*'))}"
+        # Check that the git repo was cloned in the hook's output path.
+        output_path = Path(result_json.get("output_str") or (tmpdir / "git"))
+        git_dirs = list(output_path.glob("**/.git"))
+        assert len(git_dirs) > 0, (
+            f"Should have cloned a git repository. Output path: {output_path}"
+        )
 
         print(f"Successfully cloned repository in {elapsed_time:.2f}s")
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/hashes/on_Snapshot__93_hashes.py b/abx_plugins/plugins/hashes/on_Snapshot__93_hashes.py
index d6d2723..e4505af 100755
--- a/abx_plugins/plugins/hashes/on_Snapshot__93_hashes.py
+++ b/abx_plugins/plugins/hashes/on_Snapshot__93_hashes.py
@@ -24,21 +24,22 @@
 
 
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
+
 def sha256_file(filepath: Path) -> str:
     """Compute SHA256 hash of a file."""
     h = hashlib.sha256()
     try:
-        with open(filepath, 'rb') as f:
+        with open(filepath, "rb") as f:
             while chunk := f.read(65536):
                 h.update(chunk)
         return h.hexdigest()
     except (OSError, PermissionError):
-        return '0' * 64
+        return "0" * 64
 
 
 def sha256_data(data: bytes) -> str:
@@ -46,9 +47,11 @@ def sha256_data(data: bytes) -> str:
     return hashlib.sha256(data).hexdigest()
 
 
-def collect_files(snapshot_dir: Path, exclude_dirs: Optional[List[str]] = None) -> List[Tuple[Path, str, int]]:
+def collect_files(
+    snapshot_dir: Path, exclude_dirs: Optional[List[str]] = None
+) -> List[Tuple[Path, str, int]]:
     """Recursively collect all files in snapshot directory."""
-    exclude_dirs = exclude_dirs or ['hashes', '.git', '__pycache__']
+    exclude_dirs = exclude_dirs or ["hashes", ".git", "__pycache__"]
     files = []
 
     for root, dirs, filenames in os.walk(snapshot_dir):
@@ -72,7 +75,7 @@ def collect_files(snapshot_dir: Path, exclude_dirs: Optional[List[str]] = None)
 def build_merkle_tree(file_hashes: List[str]) -> Tuple[str, List[List[str]]]:
     """Build a Merkle tree from a list of leaf hashes."""
     if not file_hashes:
-        return sha256_data(b''), [[]]
+        return sha256_data(b""), [[]]
 
     tree_levels = [file_hashes.copy()]
 
@@ -88,7 +91,7 @@ def build_merkle_tree(file_hashes: List[str]) -> Tuple[str, List[List[str]]]:
             else:
                 combined = left + left
 
-            parent_hash = sha256_data(combined.encode('utf-8'))
+            parent_hash = sha256_data(combined.encode("utf-8"))
             next_level.append(parent_hash)
 
         tree_levels.append(next_level)
@@ -105,41 +108,46 @@ def create_hashes(snapshot_dir: Path) -> Dict[str, Any]:
     total_size = sum(size for _, _, size in files)
 
     file_list = [
-        {'path': str(path), 'hash': file_hash, 'size': size}
+        {"path": str(path), "hash": file_hash, "size": size}
         for path, file_hash, size in files
     ]
 
     return {
-        'root_hash': root_hash,
-        'tree_levels': tree_levels,
-        'files': file_list,
-        'metadata': {
-            'timestamp': datetime.now(timezone.utc).isoformat(),
-            'file_count': len(files),
-            'total_size': total_size,
-            'tree_depth': len(tree_levels),
+        "root_hash": root_hash,
+        "tree_levels": tree_levels,
+        "files": file_list,
+        "metadata": {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "file_count": len(files),
+            "total_size": total_size,
+            "tree_depth": len(tree_levels),
         },
     }
 
 
 @click.command()
-@click.option('--url', required=True, help='URL being archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL being archived")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Generate Merkle tree of all archived outputs."""
-    status = 'failed'
+    status = "failed"
     output = None
-    error = ''
+    error = ""
     root_hash = None
     file_count = 0
 
     try:
         # Check if enabled
-        save_hashes = os.getenv('HASHES_ENABLED', 'true').lower() in ('true', '1', 'yes', 'on')
+        save_hashes = os.getenv("HASHES_ENABLED", "true").lower() in (
+            "true",
+            "1",
+            "yes",
+            "on",
+        )
 
         if not save_hashes:
-            status = 'skipped'
-            click.echo(json.dumps({'status': status, 'output': 'HASHES_ENABLED=false'}))
+            status = "skipped"
+            click.echo(json.dumps({"status": status, "output": "HASHES_ENABLED=false"}))
             sys.exit(0)
 
         # Working directory is the extractor output dir (e.g., <snapshot>/hashes/)
@@ -148,41 +156,41 @@ def main(url: str, snapshot_id: str):
         snapshot_dir = output_dir.parent
 
         if not snapshot_dir.exists():
-            raise FileNotFoundError(f'Snapshot directory not found: {snapshot_dir}')
+            raise FileNotFoundError(f"Snapshot directory not found: {snapshot_dir}")
 
         # Ensure output directory exists
         output_dir.mkdir(exist_ok=True)
-        output_path = output_dir / 'hashes.json'
+        output_path = output_dir / "hashes.json"
 
         # Generate Merkle tree
         merkle_data = create_hashes(snapshot_dir)
 
         # Write output
-        with open(output_path, 'w', encoding='utf-8') as f:
+        with open(output_path, "w", encoding="utf-8") as f:
             json.dump(merkle_data, f, indent=2)
 
-        status = 'succeeded'
-        output = 'hashes.json'
-        root_hash = merkle_data['root_hash']
-        file_count = merkle_data['metadata']['file_count']
+        status = "succeeded"
+        output = "hashes.json"
+        root_hash = merkle_data["root_hash"]
+        file_count = merkle_data["metadata"]["file_count"]
 
     except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-        click.echo(f'Error: {error}', err=True)
+        error = f"{type(e).__name__}: {e}"
+        status = "failed"
+        click.echo(f"Error: {error}", err=True)
 
     # Print JSON result for hook runner
     result = {
-        'status': status,
-        'output': output,
-        'error': error or None,
-        'root_hash': root_hash,
-        'file_count': file_count,
+        "status": status,
+        "output": output,
+        "error": error or None,
+        "root_hash": root_hash,
+        "file_count": file_count,
     }
     click.echo(json.dumps(result))
 
-    sys.exit(0 if status in ('succeeded', 'skipped') else 1)
+    sys.exit(0 if status in ("succeeded", "skipped") else 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/hashes/tests/test_hashes.py b/abx_plugins/plugins/hashes/tests/test_hashes.py
index d10ee1b..bdae153 100644
--- a/abx_plugins/plugins/hashes/tests/test_hashes.py
+++ b/abx_plugins/plugins/hashes/tests/test_hashes.py
@@ -16,7 +16,7 @@
 
 # Get the path to the hashes hook
 PLUGIN_DIR = Path(__file__).parent.parent
-HASHES_HOOK = PLUGIN_DIR / 'on_Snapshot__93_hashes.py'
+HASHES_HOOK = PLUGIN_DIR / "on_Snapshot__93_hashes.py"
 
 
 class TestHashesPlugin:
@@ -30,130 +30,135 @@ def test_hashes_generates_tree_for_files(self):
         """Hashes hook should generate merkle tree for files in snapshot directory."""
         with tempfile.TemporaryDirectory() as temp_dir:
             # Create a mock snapshot directory structure
-            snap_dir = Path(temp_dir) / 'snap'
+            snap_dir = Path(temp_dir) / "snap"
             snap_dir.mkdir(parents=True, exist_ok=True)
 
             # Create output directory for hashes
-            output_dir = snap_dir / 'hashes'
+            output_dir = snap_dir / "hashes"
             output_dir.mkdir()
 
             # Create some test files
-            (snap_dir / 'index.html').write_text('<html><body>Test</body></html>')
-            (snap_dir / 'screenshot.png').write_bytes(b'\x89PNG\r\n\x1a\n' + b'\x00' * 100)
+            (snap_dir / "index.html").write_text("<html><body>Test</body></html>")
+            (snap_dir / "screenshot.png").write_bytes(
+                b"\x89PNG\r\n\x1a\n" + b"\x00" * 100
+            )
 
-            subdir = snap_dir / 'media'
+            subdir = snap_dir / "media"
             subdir.mkdir()
-            (subdir / 'video.mp4').write_bytes(b'\x00\x00\x00\x18ftypmp42')
+            (subdir / "video.mp4").write_bytes(b"\x00\x00\x00\x18ftypmp42")
 
             # Run the hook from the output directory
             env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'true'
-            env['SNAP_DIR'] = str(snap_dir)
+            env["HASHES_ENABLED"] = "true"
+            env["SNAP_DIR"] = str(snap_dir)
 
             result = subprocess.run(
                 [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
+                    sys.executable,
+                    str(HASHES_HOOK),
+                    "--url=https://example.com",
+                    "--snapshot-id=test-snapshot",
                 ],
                 capture_output=True,
                 text=True,
                 cwd=str(output_dir),  # Hook expects to run from output dir
                 env=env,
-                timeout=30
+                timeout=30,
             )
 
             # Should succeed
             assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
             # Check output file exists
-            output_file = output_dir / 'hashes.json'
+            output_file = output_dir / "hashes.json"
             assert output_file.exists(), "hashes.json not created"
 
             # Parse and verify output
             with open(output_file) as f:
                 data = json.load(f)
 
-            assert 'root_hash' in data
-            assert 'files' in data
-            assert 'metadata' in data
+            assert "root_hash" in data
+            assert "files" in data
+            assert "metadata" in data
 
             # Should have indexed our test files
-            file_paths = [f['path'] for f in data['files']]
-            assert 'index.html' in file_paths
-            assert 'screenshot.png' in file_paths
+            file_paths = [f["path"] for f in data["files"]]
+            assert "index.html" in file_paths
+            assert "screenshot.png" in file_paths
 
             # Verify metadata
-            assert data['metadata']['file_count'] > 0
-            assert data['metadata']['total_size'] > 0
+            assert data["metadata"]["file_count"] > 0
+            assert data["metadata"]["total_size"] > 0
 
     def test_hashes_skips_when_disabled(self):
         """Hashes hook should skip when HASHES_ENABLED=false."""
         with tempfile.TemporaryDirectory() as temp_dir:
-            snap_dir = Path(temp_dir) / 'snap'
+            snap_dir = Path(temp_dir) / "snap"
             snap_dir.mkdir(parents=True, exist_ok=True)
-            output_dir = snap_dir / 'hashes'
+            output_dir = snap_dir / "hashes"
             output_dir.mkdir()
 
             env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'false'
-            env['SNAP_DIR'] = str(snap_dir)
+            env["HASHES_ENABLED"] = "false"
+            env["SNAP_DIR"] = str(snap_dir)
 
             result = subprocess.run(
                 [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
+                    sys.executable,
+                    str(HASHES_HOOK),
+                    "--url=https://example.com",
+                    "--snapshot-id=test-snapshot",
                 ],
                 capture_output=True,
                 text=True,
                 cwd=str(output_dir),
                 env=env,
-                timeout=30
+                timeout=30,
             )
 
             # Should succeed (exit 0) but skip
             assert result.returncode == 0
-            assert 'skipped' in result.stdout
+            assert "skipped" in result.stdout
 
     def test_hashes_handles_empty_directory(self):
         """Hashes hook should handle empty snapshot directory."""
         with tempfile.TemporaryDirectory() as temp_dir:
-            snap_dir = Path(temp_dir) / 'snap'
+            snap_dir = Path(temp_dir) / "snap"
             snap_dir.mkdir(parents=True, exist_ok=True)
-            output_dir = snap_dir / 'hashes'
+            output_dir = snap_dir / "hashes"
             output_dir.mkdir()
 
             env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'true'
-            env['SNAP_DIR'] = str(snap_dir)
+            env["HASHES_ENABLED"] = "true"
+            env["SNAP_DIR"] = str(snap_dir)
 
             result = subprocess.run(
                 [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
+                    sys.executable,
+                    str(HASHES_HOOK),
+                    "--url=https://example.com",
+                    "--snapshot-id=test-snapshot",
                 ],
                 capture_output=True,
                 text=True,
                 cwd=str(output_dir),
                 env=env,
-                timeout=30
+                timeout=30,
             )
 
             # Should succeed even with empty directory
             assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
             # Check output file exists
-            output_file = output_dir / 'hashes.json'
+            output_file = output_dir / "hashes.json"
             assert output_file.exists()
 
             with open(output_file) as f:
                 data = json.load(f)
 
             # Should have empty file list
-            assert data['metadata']['file_count'] == 0
+            assert data["metadata"]["file_count"] == 0
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/headers/tests/conftest.py b/abx_plugins/plugins/headers/tests/conftest.py
new file mode 100644
index 0000000..87b3198
--- /dev/null
+++ b/abx_plugins/plugins/headers/tests/conftest.py
@@ -0,0 +1,12 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def require_chrome_runtime():
+    """Require chrome runtime prerequisites for integration tests."""
+    from abx_pkg import NpmProvider
+
+    try:
+        NpmProvider()
+    except Exception as exc:
+        pytest.fail(f"Chrome integration prerequisites unavailable: {exc}")
diff --git a/abx_plugins/plugins/headers/tests/test_headers.py b/abx_plugins/plugins/headers/tests/test_headers.py
index 06e033b..73ae865 100644
--- a/abx_plugins/plugins/headers/tests/test_headers.py
+++ b/abx_plugins/plugins/headers/tests/test_headers.py
@@ -2,16 +2,14 @@
 Integration tests for headers plugin
 
 Tests verify:
-    pass
 1. Plugin script exists and is executable
 2. Node.js is available
-3. Headers extraction works for real example.com
+3. Headers extraction works for deterministic local URLs
 4. Output JSON contains actual HTTP headers
 5. Config options work (TIMEOUT, USER_AGENT)
 """
 
 import json
-import shutil
 import subprocess
 import tempfile
 import time
@@ -19,6 +17,8 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     CHROME_NAVIGATE_HOOK,
     get_test_env,
@@ -26,15 +26,58 @@
 )
 
 PLUGIN_DIR = Path(__file__).parent.parent
-HEADERS_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_headers.*'), None)
-TEST_URL = 'https://example.com'
+_HEADERS_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_headers.*"), None)
+if _HEADERS_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+HEADERS_HOOK = _HEADERS_HOOK
+TEST_URL = "http://headers-test.invalid/"
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+
+
+@pytest.fixture
+def headers_test_urls(httpserver):
+    """Serve deterministic pages for headers integration tests."""
+    httpserver.expect_request("/").respond_with_data(
+        """
+        <!doctype html>
+        <html>
+          <head><title>Headers Fixture</title></head>
+          <body><h1>Headers Fixture</h1></body>
+        </html>
+        """.strip(),
+        content_type="text/html; charset=utf-8",
+        headers={"Cache-Control": "max-age=60"},
+    )
+    httpserver.expect_request("/404").respond_with_data(
+        """
+        <!doctype html>
+        <html>
+          <head><title>Not Found Fixture</title></head>
+          <body><h1>Not Found</h1></body>
+        </html>
+        """.strip(),
+        content_type="text/html; charset=utf-8",
+        status=404,
+    )
+    httpserver.expect_request("/redirect").respond_with_data(
+        "",
+        status=302,
+        headers={"Location": "/"},
+    )
+    return {
+        "base": httpserver.url_for("/"),
+        "not_found": httpserver.url_for("/404"),
+        "redirect": httpserver.url_for("/redirect"),
+    }
+
 
 def normalize_root_url(url: str) -> str:
-    return url.rstrip('/')
+    return url.rstrip("/")
+
 
 def run_headers_capture(headers_dir, snapshot_chrome_dir, env, url, snapshot_id):
     hook_proc = subprocess.Popen(
-        ['node', str(HEADERS_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
+        ["node", str(HEADERS_HOOK), f"--url={url}", f"--snapshot-id={snapshot_id}"],
         cwd=headers_dir,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
@@ -43,7 +86,12 @@ def run_headers_capture(headers_dir, snapshot_chrome_dir, env, url, snapshot_id)
     )
 
     nav_result = subprocess.run(
-        ['node', str(CHROME_NAVIGATE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
+        [
+            "node",
+            str(CHROME_NAVIGATE_HOOK),
+            f"--url={url}",
+            f"--snapshot-id={snapshot_id}",
+        ],
         cwd=snapshot_chrome_dir,
         capture_output=True,
         text=True,
@@ -51,8 +99,9 @@ def run_headers_capture(headers_dir, snapshot_chrome_dir, env, url, snapshot_id)
         env=env,
     )
 
-    headers_file = headers_dir / 'headers.json'
-    for _ in range(60):
+    headers_file = headers_dir / "headers.json"
+    wait_seconds = 60 if nav_result.returncode == 0 else 5
+    for _ in range(wait_seconds):
         if headers_file.exists() and headers_file.stat().st_size > 0:
             break
         time.sleep(1)
@@ -77,50 +126,53 @@ def test_hook_script_exists():
 
 def test_node_is_available():
     """Test that Node.js is available on the system."""
-    result = subprocess.run(
-        ['which', 'node'],
-        capture_output=True,
-        text=True
-    )
-
-    if result.returncode != 0:
-        pass
+    result = subprocess.run(["which", "node"], capture_output=True, text=True)
+    assert result.returncode == 0, f"node not found in PATH: {result.stderr}"
 
     binary_path = result.stdout.strip()
     assert Path(binary_path).exists(), f"Binary should exist at {binary_path}"
 
     # Test that node is executable and get version
     result = subprocess.run(
-        ['node', '--version'],
+        ["node", "--version"],
         capture_output=True,
         text=True,
-        timeout=10
-    ,
-            env=get_test_env())
+        timeout=10,
+        env=get_test_env(),
+    )
     assert result.returncode == 0, f"node not executable: {result.stderr}"
-    assert result.stdout.startswith('v'), f"Unexpected node version format: {result.stdout}"
-
+    assert result.stdout.startswith("v"), (
+        f"Unexpected node version format: {result.stdout}"
+    )
 
-def test_extracts_headers_from_example_com():
-    """Test full workflow: extract headers from real example.com."""
 
-    # Check node is available
-    if not shutil.which('node'):
-        pass
+def test_extracts_headers_from_example_com(require_chrome_runtime, headers_test_urls):
+    """Test full workflow: extract headers from deterministic local fixture."""
+    test_url = headers_test_urls["base"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
+        with chrome_session(
+            tmpdir,
+            test_url=test_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            headers_dir = snapshot_chrome_dir.parent / "headers"
             headers_dir.mkdir(exist_ok=True)
 
             result = run_headers_capture(
                 headers_dir,
                 snapshot_chrome_dir,
                 env,
-                TEST_URL,
-                'test789',
+                test_url,
+                "test789",
             )
 
         hook_code, stdout, stderr, nav_result, headers_file = result
@@ -129,72 +181,93 @@ def test_extracts_headers_from_example_com():
 
         # Parse clean JSONL output
         result_json = None
-        for line in stdout.strip().split('\n'):
+        for line in stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify output file exists (hook writes to current directory)
         assert headers_file.exists(), "headers.json not created"
 
-        # Verify headers JSON contains REAL example.com response
+        # Verify headers JSON contains deterministic local response
         headers_data = json.loads(headers_file.read_text())
 
-        assert 'url' in headers_data, "Should have url field"
-        assert normalize_root_url(headers_data['url']) == normalize_root_url(TEST_URL), f"URL should be {TEST_URL}"
+        assert "url" in headers_data, "Should have url field"
+        assert normalize_root_url(headers_data["url"]) == normalize_root_url(
+            test_url
+        ), f"URL should be {test_url}"
 
-        assert 'status' in headers_data, "Should have status field"
-        assert headers_data['status'] in [200, 301, 302], \
+        assert "status" in headers_data, "Should have status field"
+        assert headers_data["status"] in [200, 301, 302], (
             f"Should have valid HTTP status, got {headers_data['status']}"
+        )
 
-        assert 'request_headers' in headers_data, "Should have request_headers field"
-        assert isinstance(headers_data['request_headers'], dict), "Request headers should be a dict"
+        assert "request_headers" in headers_data, "Should have request_headers field"
+        assert isinstance(headers_data["request_headers"], dict), (
+            "Request headers should be a dict"
+        )
 
-        assert 'response_headers' in headers_data, "Should have response_headers field"
-        assert isinstance(headers_data['response_headers'], dict), "Response headers should be a dict"
-        assert len(headers_data['response_headers']) > 0, "Response headers dict should not be empty"
+        assert "response_headers" in headers_data, "Should have response_headers field"
+        assert isinstance(headers_data["response_headers"], dict), (
+            "Response headers should be a dict"
+        )
+        assert len(headers_data["response_headers"]) > 0, (
+            "Response headers dict should not be empty"
+        )
 
-        assert 'headers' in headers_data, "Should have headers field"
-        assert isinstance(headers_data['headers'], dict), "Headers should be a dict"
+        assert "headers" in headers_data, "Should have headers field"
+        assert isinstance(headers_data["headers"], dict), "Headers should be a dict"
 
         # Verify common HTTP headers are present
-        headers_lower = {k.lower(): v for k, v in headers_data['response_headers'].items()}
-        assert 'content-type' in headers_lower or 'content-length' in headers_lower, \
+        headers_lower = {
+            k.lower(): v for k, v in headers_data["response_headers"].items()
+        }
+        assert "content-type" in headers_lower or "content-length" in headers_lower, (
             "Should have at least one common HTTP header"
+        )
 
-        assert headers_data['response_headers'].get(':status') == str(headers_data['status']), \
-            "Response headers should include :status pseudo header"
+        assert headers_data["response_headers"].get(":status") == str(
+            headers_data["status"]
+        ), "Response headers should include :status pseudo header"
 
 
-def test_headers_output_structure():
+def test_headers_output_structure(require_chrome_runtime, headers_test_urls):
     """Test that headers plugin produces correctly structured output."""
-
-    if not shutil.which('node'):
-        pass
+    test_url = headers_test_urls["base"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
+        with chrome_session(
+            tmpdir,
+            test_url=test_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            headers_dir = snapshot_chrome_dir.parent / "headers"
             headers_dir.mkdir(exist_ok=True)
 
             result = run_headers_capture(
                 headers_dir,
                 snapshot_chrome_dir,
                 env,
-                TEST_URL,
-                'testformat',
+                test_url,
+                "testformat",
             )
 
         hook_code, stdout, stderr, nav_result, headers_file = result
@@ -203,20 +276,20 @@ def test_headers_output_structure():
 
         # Parse clean JSONL output
         result_json = None
-        for line in stdout.strip().split('\n'):
+        for line in stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify output structure
         assert headers_file.exists(), "Output headers.json not created"
@@ -224,71 +297,84 @@ def test_headers_output_structure():
         output_data = json.loads(headers_file.read_text())
 
         # Verify all required fields are present
-        assert 'url' in output_data, "Output should have url field"
-        assert 'status' in output_data, "Output should have status field"
-        assert 'request_headers' in output_data, "Output should have request_headers field"
-        assert 'response_headers' in output_data, "Output should have response_headers field"
-        assert 'headers' in output_data, "Output should have headers field"
+        assert "url" in output_data, "Output should have url field"
+        assert "status" in output_data, "Output should have status field"
+        assert "request_headers" in output_data, (
+            "Output should have request_headers field"
+        )
+        assert "response_headers" in output_data, (
+            "Output should have response_headers field"
+        )
+        assert "headers" in output_data, "Output should have headers field"
 
         # Verify data types
-        assert isinstance(output_data['status'], int), "Status should be integer"
-        assert isinstance(output_data['request_headers'], dict), "Request headers should be dict"
-        assert isinstance(output_data['response_headers'], dict), "Response headers should be dict"
-        assert isinstance(output_data['headers'], dict), "Headers should be dict"
+        assert isinstance(output_data["status"], int), "Status should be integer"
+        assert isinstance(output_data["request_headers"], dict), (
+            "Request headers should be dict"
+        )
+        assert isinstance(output_data["response_headers"], dict), (
+            "Response headers should be dict"
+        )
+        assert isinstance(output_data["headers"], dict), "Headers should be dict"
 
-        # Verify example.com returns expected headers
-        assert normalize_root_url(output_data['url']) == normalize_root_url(TEST_URL)
-        assert output_data['status'] in [200, 301, 302]
+        # Verify local fixture returns expected headers
+        assert normalize_root_url(output_data["url"]) == normalize_root_url(test_url)
+        assert output_data["status"] == 200
 
 
 def test_fails_without_chrome_session():
     """Test that headers plugin fails when chrome session is missing."""
 
-    if not shutil.which('node'):
-        pass
-
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         # Run headers extraction
         result = subprocess.run(
-            ['node', str(HEADERS_HOOK), f'--url={TEST_URL}', '--snapshot-id=testhttp'],
+            ["node", str(HEADERS_HOOK), f"--url={TEST_URL}", "--snapshot-id=testhttp"],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=60
-        ,
-            env=get_test_env())
+            timeout=60,
+            env=get_test_env(),
+        )
 
         assert result.returncode != 0, "Should fail without chrome session"
-        assert 'No Chrome session found (chrome plugin must run first)' in (result.stdout + result.stderr)
+        combined_output = result.stdout + result.stderr
+        assert (
+            "No Chrome session found (chrome plugin must run first)" in combined_output
+            or "Cannot find module 'puppeteer-core'" in combined_output
+        ), f"Unexpected error output: {combined_output}"
 
 
-def test_config_timeout_honored():
+def test_config_timeout_honored(require_chrome_runtime, headers_test_urls):
     """Test that TIMEOUT config is respected."""
-
-    if not shutil.which('node'):
-        pass
+    test_url = headers_test_urls["base"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env_override = os.environ.copy()
-        env_override['TIMEOUT'] = '5'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
+        # Set very short timeout (fixture should still succeed)
+        with chrome_session(
+            tmpdir,
+            test_url=test_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            headers_dir = snapshot_chrome_dir.parent / "headers"
             headers_dir.mkdir(exist_ok=True)
-            env.update(env_override)
+            env["TIMEOUT"] = "5"
 
             result = run_headers_capture(
                 headers_dir,
                 snapshot_chrome_dir,
                 env,
-                TEST_URL,
-                'testtimeout',
+                test_url,
+                "testtimeout",
             )
 
         # Should complete (success or fail, but not hang)
@@ -297,113 +383,138 @@ def test_config_timeout_honored():
         assert hook_code in (0, 1), "Should complete without hanging"
 
 
-def test_config_user_agent():
+def test_config_user_agent(require_chrome_runtime, headers_test_urls):
     """Test that USER_AGENT config is used."""
-
-    if not shutil.which('node'):
-        pass
+    test_url = headers_test_urls["base"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        # Set custom user agent
-        import os
-        env_override = os.environ.copy()
-        env_override['USER_AGENT'] = 'TestBot/1.0'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
+        with chrome_session(
+            tmpdir,
+            test_url=test_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            headers_dir = snapshot_chrome_dir.parent / "headers"
             headers_dir.mkdir(exist_ok=True)
-            env.update(env_override)
+            env["USER_AGENT"] = "TestBot/1.0"
 
             result = run_headers_capture(
                 headers_dir,
                 snapshot_chrome_dir,
                 env,
-                TEST_URL,
-                'testua',
+                test_url,
+                "testua",
             )
 
-        # Should succeed (example.com doesn't block)
+        # Should succeed on fixture page
         hook_code, stdout, _stderr, nav_result, _headers_file = result
         assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
         if hook_code == 0:
             # Parse clean JSONL output
             result_json = None
-            for line in stdout.strip().split('\n'):
+            for line in stdout.strip().split("\n"):
                 line = line.strip()
-                if line.startswith('{'):
+                if line.startswith("{"):
                     pass
                     try:
                         record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
+                        if record.get("type") == "ArchiveResult":
                             result_json = record
                             break
                     except json.JSONDecodeError:
                         pass
 
             assert result_json, "Should have ArchiveResult JSONL output"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
+            assert result_json["status"] == "succeeded", (
+                f"Should succeed: {result_json}"
+            )
 
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
 
-    if not shutil.which('node'):
-        pass
+def test_handles_https_urls(require_chrome_runtime, chrome_test_https_url):
+    """Test HTTPS behavior deterministically (success or explicit cert failure)."""
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url='https://example.org', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
+        with chrome_session(
+            tmpdir,
+            test_url=chrome_test_https_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            headers_dir = snapshot_chrome_dir.parent / "headers"
             headers_dir.mkdir(exist_ok=True)
             result = run_headers_capture(
                 headers_dir,
                 snapshot_chrome_dir,
                 env,
-                'https://example.org',
-                'testhttps',
+                chrome_test_https_url,
+                "testhttps",
             )
 
         hook_code, _stdout, _stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            if headers_file.exists():
-                output_data = json.loads(headers_file.read_text())
-                assert normalize_root_url(output_data['url']) == normalize_root_url('https://example.org')
-                assert output_data['status'] in [200, 301, 302]
+        if nav_result.returncode == 0:
+            assert hook_code == 0, (
+                "Headers hook should succeed after successful HTTPS navigation"
+            )
+            assert headers_file.exists(), "headers.json not created for HTTPS page"
+            output_data = json.loads(headers_file.read_text())
+            assert normalize_root_url(output_data["url"]) == normalize_root_url(
+                chrome_test_https_url
+            )
+            assert output_data["status"] == 200
+        else:
+            nav_output = (nav_result.stdout + nav_result.stderr).lower()
+            assert "err_cert" in nav_output or "certificate" in nav_output, (
+                f"Expected TLS/certificate navigation error, got: {nav_result.stderr}"
+            )
+            assert hook_code in (0, 1), (
+                "Hook must terminate cleanly when HTTPS navigation fails"
+            )
 
 
-def test_handles_404_gracefully():
+def test_handles_404_gracefully(require_chrome_runtime, headers_test_urls):
     """Test that headers plugin handles 404s gracefully."""
-
-    if not shutil.which('node'):
-        pass
+    not_found_url = headers_test_urls["not_found"]
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url='https://example.com/nonexistent-page-404', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
+        with chrome_session(
+            tmpdir,
+            test_url=not_found_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (_process, _pid, snapshot_chrome_dir, env):
+            headers_dir = snapshot_chrome_dir.parent / "headers"
             headers_dir.mkdir(exist_ok=True)
             result = run_headers_capture(
                 headers_dir,
                 snapshot_chrome_dir,
                 env,
-                'https://example.com/nonexistent-page-404',
-                'test404',
+                not_found_url,
+                "test404",
             )
 
-        # May succeed or fail depending on server behavior
-        # If it succeeds, verify 404 status is captured
         hook_code, _stdout, _stderr, nav_result, headers_file = result
         assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            if headers_file.exists():
-                output_data = json.loads(headers_file.read_text())
-                assert output_data['status'] == 404, "Should capture 404 status"
+        assert hook_code == 0, "Headers hook should succeed for HTTP 404 responses"
+        assert headers_file.exists(), "headers.json not created"
+        output_data = json.loads(headers_file.read_text())
+        assert output_data["status"] == 404, "Should capture 404 status"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/htmltotext/on_Snapshot__58_htmltotext.py b/abx_plugins/plugins/htmltotext/on_Snapshot__58_htmltotext.py
index 9ff8fbe..c41eab3 100755
--- a/abx_plugins/plugins/htmltotext/on_Snapshot__58_htmltotext.py
+++ b/abx_plugins/plugins/htmltotext/on_Snapshot__58_htmltotext.py
@@ -23,13 +23,13 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'htmltotext'
+PLUGIN_NAME = "htmltotext"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-OUTPUT_FILE = 'htmltotext.txt'
+OUTPUT_FILE = "htmltotext.txt"
 
 
 class HTMLTextExtractor(HTMLParser):
@@ -38,7 +38,7 @@ class HTMLTextExtractor(HTMLParser):
     def __init__(self):
         super().__init__()
         self.result = []
-        self.skip_tags = {'script', 'style', 'head', 'meta', 'link', 'noscript'}
+        self.skip_tags = {"script", "style", "head", "meta", "link", "noscript"}
         self.current_tag = None
 
     def handle_starttag(self, tag, attrs):
@@ -54,7 +54,7 @@ def handle_data(self, data):
                 self.result.append(text)
 
     def get_text(self) -> str:
-        return ' '.join(self.result)
+        return " ".join(self.result)
 
 
 def html_to_text(html: str) -> str:
@@ -65,10 +65,14 @@ def html_to_text(html: str) -> str:
         return parser.get_text()
     except Exception:
         # Fallback: strip HTML tags with regex
-        text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'<[^>]+>', ' ', text)
-        text = re.sub(r'\s+', ' ', text)
+        text = re.sub(
+            r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE
+        )
+        text = re.sub(
+            r"<style[^>]*>.*?</style>", "", text, flags=re.DOTALL | re.IGNORECASE
+        )
+        text = re.sub(r"<[^>]+>", " ", text)
+        text = re.sub(r"\s+", " ", text)
         return text.strip()
 
 
@@ -76,18 +80,18 @@ def find_html_source() -> str | None:
     """Find HTML content from other extractors in the snapshot directory."""
     # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
     search_patterns = [
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
+        "singlefile/singlefile.html",
+        "*_singlefile/singlefile.html",
+        "singlefile/*.html",
+        "*_singlefile/*.html",
+        "dom/output.html",
+        "*_dom/output.html",
+        "dom/*.html",
+        "*_dom/*.html",
+        "wget/**/*.html",
+        "*_wget/**/*.html",
+        "wget/**/*.htm",
+        "*_wget/**/*.htm",
     ]
 
     for base in (Path.cwd(), Path.cwd().parent):
@@ -96,7 +100,7 @@ def find_html_source() -> str | None:
             for match in matches:
                 if match.is_file() and match.stat().st_size > 0:
                     try:
-                        return match.read_text(errors='ignore')
+                        return match.read_text(errors="ignore")
                     except Exception:
                         continue
 
@@ -112,25 +116,25 @@ def extract_htmltotext(url: str) -> tuple[bool, str | None, str]:
     # Find HTML source from other extractors
     html_content = find_html_source()
     if not html_content:
-        return False, None, 'No HTML source found (run singlefile, dom, or wget first)'
+        return False, None, "No HTML source found (run singlefile, dom, or wget first)"
 
     # Convert HTML to text
     text = html_to_text(html_content)
 
     if not text or len(text) < 10:
-        return False, None, 'No meaningful text extracted from HTML'
+        return False, None, "No meaningful text extracted from HTML"
 
     # Output directory is current directory (hook already runs in output dir)
     output_dir = Path(OUTPUT_DIR)
     output_path = output_dir / OUTPUT_FILE
-    output_path.write_text(text, encoding='utf-8')
+    output_path.write_text(text, encoding="utf-8")
 
-    return True, str(output_path), ''
+    return True, str(output_path), ""
 
 
 @click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL that was archived")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Convert HTML to plain text for search indexing."""
 
@@ -141,22 +145,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/htmltotext/tests/test_htmltotext.py b/abx_plugins/plugins/htmltotext/tests/test_htmltotext.py
index b284e71..ca8e33a 100644
--- a/abx_plugins/plugins/htmltotext/tests/test_htmltotext.py
+++ b/abx_plugins/plugins/htmltotext/tests/test_htmltotext.py
@@ -13,81 +13,115 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-HTMLTOTEXT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_htmltotext.*'), None)
-TEST_URL = 'https://example.com'
+_HTMLTOTEXT_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_htmltotext.*"), None)
+if _HTMLTOTEXT_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+HTMLTOTEXT_HOOK = _HTMLTOTEXT_HOOK
+TEST_URL = "https://example.com"
+
 
 def test_hook_script_exists():
     assert HTMLTOTEXT_HOOK.exists()
 
+
 def test_extracts_text_from_html():
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(snap_dir)
+        env["SNAP_DIR"] = str(snap_dir)
         # Create HTML source
-        (snap_dir / 'singlefile').mkdir(parents=True, exist_ok=True)
-        (snap_dir / 'singlefile' / 'singlefile.html').write_text('<html><body><h1>Example Domain</h1><p>This domain is for examples.</p></body></html>')
+        (snap_dir / "singlefile").mkdir(parents=True, exist_ok=True)
+        (snap_dir / "singlefile" / "singlefile.html").write_text(
+            "<html><body><h1>Example Domain</h1><p>This domain is for examples.</p></body></html>"
+        )
 
         result = subprocess.run(
-            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30, env=env
+            [
+                sys.executable,
+                str(HTMLTOTEXT_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test789",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env,
         )
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify output file (hook writes to current directory)
-        output_file = snap_dir / 'htmltotext' / 'htmltotext.txt'
-        assert output_file.exists(), f"htmltotext.txt not created. Files: {list(snap_dir.rglob('*'))}"
+        output_file = snap_dir / "htmltotext" / "htmltotext.txt"
+        assert output_file.exists(), (
+            f"htmltotext.txt not created. Files: {list(snap_dir.rglob('*'))}"
+        )
         content = output_file.read_text()
         assert len(content) > 0, "Content should not be empty"
-        assert 'Example Domain' in content, "Should contain text from HTML"
+        assert "Example Domain" in content, "Should contain text from HTML"
+
 
 def test_fails_gracefully_without_html():
     with tempfile.TemporaryDirectory() as tmpdir:
-        snap_dir = Path(tmpdir) / 'snap'
+        snap_dir = Path(tmpdir) / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(snap_dir)
+        env["SNAP_DIR"] = str(snap_dir)
         result = subprocess.run(
-            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30, env=env
+            [
+                sys.executable,
+                str(HTMLTOTEXT_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env,
         )
 
         # Should exit with non-zero or emit failure JSONL
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         if result_json:
-            # Should report failure or skip since no HTML source
-            assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip without HTML: {result_json}"
+            assert result_json["status"] == "failed", (
+                f"Should fail without HTML source: {result_json}"
+            )
+
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js b/abx_plugins/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
index 50d63cf..d692d05 100755
--- a/abx_plugins/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
+++ b/abx_plugins/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
@@ -38,6 +38,10 @@ const {
     getEnv,
     getEnvBool,
     getEnvInt,
+    parseArgs,
+    readCdpUrl,
+    connectToPage,
+    waitForPageLoaded,
 } = require('../chrome/chrome_utils.js');
 
 // Check if infiniscroll is enabled BEFORE requiring puppeteer
@@ -49,48 +53,7 @@ if (!getEnvBool('INFINISCROLL_ENABLED', true)) {
 const puppeteer = require('puppeteer-core');
 
 const PLUGIN_NAME = 'infiniscroll';
-const CHROME_SESSION_DIR = path.join(SNAP_DIR, 'chrome');
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-function getPageId() {
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (fs.existsSync(targetIdFile)) {
-        return fs.readFileSync(targetIdFile, 'utf8').trim();
-    }
-    return null;
-}
-
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-    return false;
-}
+const CHROME_SESSION_DIR = '../chrome';
 
 function sleep(ms) {
     return new Promise(resolve => setTimeout(resolve, ms));
@@ -337,40 +300,24 @@ async function main() {
     const minHeight = getEnvInt('INFINISCROLL_MIN_HEIGHT', 16000);
     const expandDetailsEnabled = getEnvBool('INFINISCROLL_EXPAND_DETAILS', true);
 
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        console.error(CHROME_SESSION_REQUIRED_ERROR);
-        process.exit(1);
-    }
-
-    // Wait for page to be loaded
-    const pageLoaded = await waitForChromeTabLoaded(60000);
-    if (!pageLoaded) {
-        console.error('ERROR: Page not loaded after 60s (chrome_navigate must complete first)');
-        process.exit(1);
-    }
-
     let browser = null;
     try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            throw new Error('No pages found in browser');
+        if (!readCdpUrl(CHROME_SESSION_DIR)) {
+            throw new Error('No Chrome session found (chrome plugin must run first)');
         }
 
-        // Find the right page by target ID
-        const targetId = getPageId();
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
+        const connectTimeoutMs = Math.min(
+            timeout,
+            getEnvInt('TIMEOUT', 30) * 1000
+        );
+        const connection = await connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs: connectTimeoutMs,
+            puppeteer,
+        });
+        browser = connection.browser;
+        const page = connection.page;
+        await waitForPageLoaded(CHROME_SESSION_DIR, connectTimeoutMs * 4, 200);
 
         console.error(`Starting infinite scroll on ${url}`);
 
diff --git a/abx_plugins/plugins/infiniscroll/tests/test_infiniscroll.py b/abx_plugins/plugins/infiniscroll/tests/test_infiniscroll.py
index 89673eb..17eeb15 100644
--- a/abx_plugins/plugins/infiniscroll/tests/test_infiniscroll.py
+++ b/abx_plugins/plugins/infiniscroll/tests/test_infiniscroll.py
@@ -12,7 +12,6 @@
 """
 
 import json
-import os
 import re
 import subprocess
 import time
@@ -21,6 +20,8 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 # Import shared Chrome test helpers
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
@@ -29,8 +30,89 @@
 
 
 PLUGIN_DIR = Path(__file__).parent.parent
-INFINISCROLL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_infiniscroll.*'), None)
-TEST_URL = 'https://www.singsing.movie/'
+INFINISCROLL_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_infiniscroll.*"), None)
+TEST_URL = "https://example.com/"
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+INFINISCROLL_TEST_PAGE_HTML = """
+<!doctype html>
+<html>
+<head>
+  <meta charset="utf-8" />
+  <title>Infinite Scroll Test Page</title>
+  <style>
+    body { margin: 0; font-family: sans-serif; }
+    #feed { max-width: 860px; margin: 0 auto; padding: 12px; }
+    .card {
+      margin: 12px 0;
+      padding: 16px;
+      min-height: 220px;
+      border: 1px solid #ddd;
+      border-radius: 8px;
+      background: #f8f8f8;
+    }
+    #status {
+      position: fixed;
+      top: 0;
+      right: 0;
+      background: #111;
+      color: #fff;
+      padding: 8px 10px;
+      font-size: 12px;
+      border-bottom-left-radius: 8px;
+    }
+  </style>
+</head>
+<body>
+  <div id="status">loads: 0</div>
+  <main id="feed"></main>
+  <script>
+    const feed = document.getElementById('feed');
+    const status = document.getElementById('status');
+    let loadCount = 0;
+    const maxLoads = 5;
+    let inFlight = false;
+
+    function addCards(prefix, count) {
+      for (let i = 0; i < count; i++) {
+        const card = document.createElement('article');
+        card.className = 'card';
+        card.textContent = `${prefix} item ${i + 1}`;
+        feed.appendChild(card);
+      }
+      status.textContent = `loads: ${loadCount}`;
+    }
+
+    function maybeLoadMore() {
+      if (inFlight || loadCount >= maxLoads) return;
+      const nearBottom = window.innerHeight + window.scrollY >= document.body.scrollHeight - 120;
+      if (!nearBottom) return;
+
+      inFlight = true;
+      const nextLoad = loadCount + 1;
+      setTimeout(() => {
+        loadCount = nextLoad;
+        addCards(`batch-${loadCount}`, 8);
+        inFlight = false;
+      }, 120);
+    }
+
+    addCards('initial', 8);
+    window.addEventListener('scroll', maybeLoadMore, { passive: true });
+    window.addEventListener('load', maybeLoadMore);
+  </script>
+</body>
+</html>
+""".strip()
+
+
+@pytest.fixture
+def infiniscroll_test_url(httpserver):
+    """Serve a deterministic page that appends DOM content while scrolling."""
+    httpserver.expect_request("/").respond_with_data(
+        INFINISCROLL_TEST_PAGE_HTML,
+        content_type="text/html",
+    )
+    return httpserver.url_for("/")
 
 
 def test_hook_script_exists():
@@ -41,208 +123,267 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
+    from abx_pkg import Binary, EnvProvider
 
     # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_binary = Binary(name="node", binproviders=[EnvProvider()])
     node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for infiniscroll plugin"
+    assert node_loaded and node_loaded.abspath, (
+        "Node.js required for infiniscroll plugin"
+    )
 
 
 def test_config_infiniscroll_disabled_skips():
     """Test that INFINISCROLL_ENABLED=False exits without emitting JSONL."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
-        env['INFINISCROLL_ENABLED'] = 'False'
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
+        env["INFINISCROLL_ENABLED"] = "False"
 
         result = subprocess.run(
-            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
+            [
+                "node",
+                str(INFINISCROLL_HOOK),
+                f"--url={TEST_URL}",
+                "--snapshot-id=test-disabled",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
+        )
 
 
 def test_fails_gracefully_without_chrome_session():
     """Test that hook fails gracefully when no chrome session exists."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
-        infiniscroll_dir = snap_dir / 'infiniscroll'
+        snap_dir = tmpdir / "snap"
+        infiniscroll_dir = snap_dir / "infiniscroll"
         infiniscroll_dir.mkdir(parents=True, exist_ok=True)
 
         result = subprocess.run(
-            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
+            [
+                "node",
+                str(INFINISCROLL_HOOK),
+                f"--url={TEST_URL}",
+                "--snapshot-id=test-no-chrome",
+            ],
             cwd=infiniscroll_dir,
             capture_output=True,
             text=True,
-            env=get_test_env() | {'SNAP_DIR': str(snap_dir)},
-            timeout=30
+            env=get_test_env() | {"SNAP_DIR": str(snap_dir)},
+            timeout=30,
         )
 
         # Should fail (exit 1) when no chrome session
         assert result.returncode != 0, "Should fail when no chrome session exists"
         # Error could be about chrome/CDP not found, or puppeteer module missing
         err_lower = result.stderr.lower()
-        assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
+        assert any(x in err_lower for x in ["chrome", "cdp", "puppeteer", "module"]), (
             f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
+        )
 
 
-def test_scrolls_page_and_outputs_stats():
+def test_scrolls_page_and_outputs_stats(infiniscroll_test_url):
     """Integration test: scroll page and verify JSONL output format."""
     with tempfile.TemporaryDirectory() as tmpdir:
         with chrome_session(
             Path(tmpdir),
-            crawl_id='test-infiniscroll',
-            snapshot_id='snap-infiniscroll',
-            test_url=TEST_URL,
+            crawl_id="test-infiniscroll",
+            snapshot_id="snap-infiniscroll",
+            test_url=infiniscroll_test_url,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
         ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
             # Create infiniscroll output directory (sibling to chrome)
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
+            infiniscroll_dir = snapshot_chrome_dir.parent / "infiniscroll"
             infiniscroll_dir.mkdir()
 
             # Run infiniscroll hook
-            env['INFINISCROLL_SCROLL_LIMIT'] = '3'  # Limit scrolls for faster test
-            env['INFINISCROLL_SCROLL_DELAY'] = '500'  # Faster scrolling
-            env['INFINISCROLL_MIN_HEIGHT'] = '1000'  # Lower threshold for test
+            env["INFINISCROLL_SCROLL_LIMIT"] = "3"  # Limit scrolls for faster test
+            env["INFINISCROLL_SCROLL_DELAY"] = "500"  # Faster scrolling
+            env["INFINISCROLL_MIN_HEIGHT"] = "1000"  # Lower threshold for test
 
             result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
+                [
+                    "node",
+                    str(INFINISCROLL_HOOK),
+                    f"--url={infiniscroll_test_url}",
+                    "--snapshot-id=snap-infiniscroll",
+                ],
                 cwd=str(infiniscroll_dir),
                 capture_output=True,
                 text=True,
                 timeout=60,
-                env=env
+                env=env,
             )
 
-            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}\nStdout: {result.stdout}"
+            assert result.returncode == 0, (
+                f"Infiniscroll failed: {result.stderr}\nStdout: {result.stdout}"
+            )
 
             # Parse JSONL output
             result_json = None
-            for line in result.stdout.strip().split('\n'):
+            for line in result.stdout.strip().split("\n"):
                 line = line.strip()
-                if line.startswith('{'):
+                if line.startswith("{"):
                     try:
                         record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
+                        if record.get("type") == "ArchiveResult":
                             result_json = record
                             break
                     except json.JSONDecodeError:
                         pass
 
-            assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {result.stdout}"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+            assert result_json is not None, (
+                f"Should have ArchiveResult JSONL output. Stdout: {result.stdout}"
+            )
+            assert result_json["status"] == "succeeded", (
+                f"Should succeed: {result_json}"
+            )
 
             # Verify output_str format: "scrolled to X,XXXpx (+Y,YYYpx new content) over Z.Zs"
-            output_str = result_json.get('output_str', '')
-            assert output_str.startswith('scrolled to'), f"output_str should start with 'scrolled to': {output_str}"
-            assert 'px' in output_str, f"output_str should contain pixel count: {output_str}"
-            assert re.search(r'over \d+(\.\d+)?s', output_str), f"output_str should contain duration: {output_str}"
+            output_str = result_json.get("output_str", "")
+            assert output_str.startswith("scrolled to"), (
+                f"output_str should start with 'scrolled to': {output_str}"
+            )
+            assert "px" in output_str, (
+                f"output_str should contain pixel count: {output_str}"
+            )
+            assert re.search(r"over \d+(\.\d+)?s", output_str), (
+                f"output_str should contain duration: {output_str}"
+            )
 
             # Verify no files created in output directory
             output_files = list(infiniscroll_dir.iterdir())
-            assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
+            assert len(output_files) == 0, (
+                f"Should not create any files, but found: {output_files}"
+            )
 
 
-def test_config_scroll_limit_honored():
+def test_config_scroll_limit_honored(infiniscroll_test_url):
     """Test that INFINISCROLL_SCROLL_LIMIT config is respected."""
     with tempfile.TemporaryDirectory() as tmpdir:
         with chrome_session(
             Path(tmpdir),
-            crawl_id='test-scroll-limit',
-            snapshot_id='snap-limit',
-            test_url=TEST_URL,
+            crawl_id="test-scroll-limit",
+            snapshot_id="snap-limit",
+            test_url=infiniscroll_test_url,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
         ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
+            infiniscroll_dir = snapshot_chrome_dir.parent / "infiniscroll"
             infiniscroll_dir.mkdir()
 
             # Set scroll limit to 2 (use env from setup_chrome_session)
-            env['INFINISCROLL_SCROLL_LIMIT'] = '2'
-            env['INFINISCROLL_SCROLL_DELAY'] = '500'
-            env['INFINISCROLL_MIN_HEIGHT'] = '100000'  # High threshold so limit kicks in
+            env["INFINISCROLL_SCROLL_LIMIT"] = "2"
+            env["INFINISCROLL_SCROLL_DELAY"] = "500"
+            env["INFINISCROLL_MIN_HEIGHT"] = (
+                "100000"  # High threshold so limit kicks in
+            )
 
             result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-limit'],
+                [
+                    "node",
+                    str(INFINISCROLL_HOOK),
+                    f"--url={infiniscroll_test_url}",
+                    "--snapshot-id=snap-limit",
+                ],
                 cwd=str(infiniscroll_dir),
                 capture_output=True,
                 text=True,
                 timeout=60,
-                env=env
+                env=env,
             )
 
             assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}"
 
             # Parse output and verify scroll count
             result_json = None
-            for line in result.stdout.strip().split('\n'):
-                if line.strip().startswith('{'):
+            for line in result.stdout.strip().split("\n"):
+                if line.strip().startswith("{"):
                     try:
                         record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
+                        if record.get("type") == "ArchiveResult":
                             result_json = record
                             break
                     except json.JSONDecodeError:
                         pass
 
             assert result_json is not None, "Should have JSONL output"
-            output_str = result_json.get('output_str', '')
+            output_str = result_json.get("output_str", "")
 
             # Verify output format and that it completed (scroll limit enforced internally)
-            assert output_str.startswith('scrolled to'), f"Should have valid output_str: {output_str}"
-            assert result_json['status'] == 'succeeded', f"Should succeed with scroll limit: {result_json}"
-
+            assert output_str.startswith("scrolled to"), (
+                f"Should have valid output_str: {output_str}"
+            )
+            assert result_json["status"] == "succeeded", (
+                f"Should succeed with scroll limit: {result_json}"
+            )
 
 
-def test_config_timeout_honored():
+def test_config_timeout_honored(infiniscroll_test_url):
     """Test that INFINISCROLL_TIMEOUT config is respected."""
     with tempfile.TemporaryDirectory() as tmpdir:
         with chrome_session(
             Path(tmpdir),
-            crawl_id='test-timeout',
-            snapshot_id='snap-timeout',
-            test_url=TEST_URL,
+            crawl_id="test-timeout",
+            snapshot_id="snap-timeout",
+            test_url=infiniscroll_test_url,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
         ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
+            infiniscroll_dir = snapshot_chrome_dir.parent / "infiniscroll"
             infiniscroll_dir.mkdir()
 
             # Set very short timeout (use env from setup_chrome_session)
-            env['INFINISCROLL_TIMEOUT'] = '3'  # 3 seconds
-            env['INFINISCROLL_SCROLL_DELAY'] = '2000'  # 2s delay - timeout should trigger
-            env['INFINISCROLL_SCROLL_LIMIT'] = '100'  # High limit
-            env['INFINISCROLL_MIN_HEIGHT'] = '100000'
+            env["INFINISCROLL_TIMEOUT"] = "3"  # 3 seconds
+            env["INFINISCROLL_SCROLL_DELAY"] = (
+                "2000"  # 2s delay - timeout should trigger
+            )
+            env["INFINISCROLL_SCROLL_LIMIT"] = "100"  # High limit
+            env["INFINISCROLL_MIN_HEIGHT"] = "100000"
 
             start_time = time.time()
             result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-timeout'],
+                [
+                    "node",
+                    str(INFINISCROLL_HOOK),
+                    f"--url={infiniscroll_test_url}",
+                    "--snapshot-id=snap-timeout",
+                ],
                 cwd=str(infiniscroll_dir),
                 capture_output=True,
                 text=True,
                 timeout=30,
-                env=env
+                env=env,
             )
             elapsed = time.time() - start_time
 
             # Should complete within reasonable time (timeout + buffer)
             assert elapsed < 15, f"Should respect timeout, took {elapsed:.1f}s"
-            assert result.returncode == 0, f"Should complete even with timeout: {result.stderr}"
-
+            assert result.returncode == 0, (
+                f"Should complete even with timeout: {result.stderr}"
+            )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py b/abx_plugins/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
index 9d590a9..ef61876 100644
--- a/abx_plugins/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
+++ b/abx_plugins/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
@@ -6,7 +6,6 @@
 
 import json
 import os
-import signal
 import subprocess
 import tempfile
 import time
@@ -14,18 +13,24 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     setup_test_env,
-    get_test_env,
     launch_chromium_session,
     kill_chromium_session,
-    CHROME_LAUNCH_HOOK,
-    PLUGINS_ROOT,
+    wait_for_extensions_metadata,
 )
 
 
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = next(PLUGIN_DIR.glob('on_Crawl__*_install_istilldontcareaboutcookies_extension.*'), None)
+_INSTALL_SCRIPT = next(
+    PLUGIN_DIR.glob("on_Crawl__*_install_istilldontcareaboutcookies_extension.*"), None
+)
+if _INSTALL_SCRIPT is None:
+    raise FileNotFoundError(f"Install script not found in {PLUGIN_DIR}")
+INSTALL_SCRIPT = _INSTALL_SCRIPT
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
 
 
 def test_install_script_exists():
@@ -40,13 +45,19 @@ def test_extension_metadata():
         env["CHROME_EXTENSIONS_DIR"] = str(Path(tmpdir) / "chrome_extensions")
 
         result = subprocess.run(
-            ["node", "-e", f"const ext = require('{INSTALL_SCRIPT}'); console.log(JSON.stringify(ext.EXTENSION))"],
+            [
+                "node",
+                "-e",
+                f"const ext = require('{INSTALL_SCRIPT}'); console.log(JSON.stringify(ext.EXTENSION))",
+            ],
             capture_output=True,
             text=True,
-            env=env
+            env=env,
         )
 
-        assert result.returncode == 0, f"Failed to load extension metadata: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Failed to load extension metadata: {result.stderr}"
+        )
 
         metadata = json.loads(result.stdout)
         assert metadata["webstore_id"] == "edibdbjcniadpccecjdfdjjppcpchdlm"
@@ -67,11 +78,15 @@ def test_install_creates_cache():
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Check output mentions installation
-        assert "Installing" in result.stdout or "installed" in result.stdout or "istilldontcareaboutcookies" in result.stdout
+        assert (
+            "Installing" in result.stdout
+            or "installed" in result.stdout
+            or "istilldontcareaboutcookies" in result.stdout
+        )
 
         # Check cache file was created
         cache_file = ext_dir / "istilldontcareaboutcookies.extension.json"
@@ -90,7 +105,9 @@ def test_install_uses_existing_cache():
         ext_dir.mkdir(parents=True)
 
         # Create fake cache
-        fake_extension_dir = ext_dir / "edibdbjcniadpccecjdfdjjppcpchdlm__istilldontcareaboutcookies"
+        fake_extension_dir = (
+            ext_dir / "edibdbjcniadpccecjdfdjjppcpchdlm__istilldontcareaboutcookies"
+        )
         fake_extension_dir.mkdir(parents=True)
 
         manifest = {"version": "1.1.8", "name": "I still don't care about cookies"}
@@ -104,7 +121,7 @@ def test_install_uses_existing_cache():
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Should use cache or install successfully
@@ -126,14 +143,25 @@ def test_no_configuration_required():
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Should not require any API keys or configuration
         assert "API" not in (result.stdout + result.stderr) or result.returncode == 0
 
 
-TEST_URL = 'https://www.filmin.es/'
+COOKIE_TEST_PATH = "/cookie-consent-test"
+COOKIE_TEST_HTML_STUB = """<!doctype html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Cookie Consent Test Fixture</title>
+</head>
+<body>
+  <div class="cky-consent-container cky-popup-center" tabindex="-1" aria-label="Consentimiento de cookies" role="region"><div class="cky-consent-bar" data-cky-tag="notice" style="border-color: #151527; background-color: #151527;"><div class="cky-notice"><p class="cky-title" aria-level="2" data-cky-tag="title" role="heading" style="color: #FFFFFF;">Consentimiento de cookies</p><div class="cky-notice-group"><div class="cky-notice-des" data-cky-tag="description" style="color: #FFFFFF;"><p>En Filmin usamos cookies para el funcionamiento del sitio web, para mejorar y personalizar la experiencia de usuario y para recopilar información sobre las actividades de navegación que nos ayudan a mejorar. Puedes aceptar todo, rechazar o administrar las preferencias de cookies.&nbsp;<a href="https://www.filmin.es/aviso-legal#politica-de-cookies" class="cky-policy" aria-label="Aviso legal" target="_blank" rel="noopener" style="color: #FFFFFF; border-color: transparent; background-color: transparent;" data-cky-tag="readmore-button">Aviso legal</a></p></div><div class="cky-notice-btn-wrapper" data-cky-tag="notice-buttons"><button class="cky-btn cky-btn-customize" aria-label="Preferencias" aria-haspopup="dialog" aria-controls="ckyPreferenceCenter" data-cky-tag="settings-button" style="color: #FFFFFF; border-color: #FFFFFF; background-color: RGBA(0, 0, 0, 0);">Preferencias</button> <button class="cky-btn cky-btn-reject" aria-label="Rechazar todo" data-cky-tag="reject-button" style="color: #141426; border-color: #02FFA1; background-color: #02FFA1;">Rechazar todo</button> <button class="cky-btn cky-btn-accept" aria-label="Aceptar todo" data-cky-tag="accept-button" style="color: #141426; border-color: #02FFA1; background-color: #02FFA1;">Aceptar todo</button></div></div></div></div></div>
+</body>
+</html>
+"""
 
 
 def test_extension_loads_in_chromium():
@@ -148,68 +176,66 @@ def test_extension_loads_in_chromium():
 
         # Set up isolated env with proper directory structure
         env = setup_test_env(tmpdir)
-        env.setdefault('CHROME_HEADLESS', 'true')
+        env.setdefault("CHROME_HEADLESS", "true")
 
-        ext_dir = Path(env['CHROME_EXTENSIONS_DIR'])
+        ext_dir = Path(env["CHROME_EXTENSIONS_DIR"])
 
         # Step 1: Install the extension
         result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
+            ["node", str(INSTALL_SCRIPT)],
             cwd=str(tmpdir),
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=120,
         )
         assert result.returncode == 0, f"Extension install failed: {result.stderr}"
 
         # Verify extension cache was created
-        cache_file = ext_dir / 'istilldontcareaboutcookies.extension.json'
+        cache_file = ext_dir / "istilldontcareaboutcookies.extension.json"
         assert cache_file.exists(), "Extension cache not created"
         ext_data = json.loads(cache_file.read_text())
         print(f"Extension installed: {ext_data.get('name')} v{ext_data.get('version')}")
 
         # Step 2: Launch Chromium using the chrome hook (loads extensions automatically)
-        crawl_id = 'test-cookies'
-        crawl_dir = Path(env['CRAWL_DIR']) / crawl_id
+        crawl_id = "test-cookies"
+        crawl_dir = Path(env["CRAWL_DIR"]) / crawl_id
         crawl_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir(parents=True, exist_ok=True)
-        env['CRAWL_DIR'] = str(crawl_dir)
-
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
+        env["CRAWL_DIR"] = str(crawl_dir)
 
-        # Wait for Chromium to launch and CDP URL to be available
+        chrome_launch_process = None
         cdp_url = None
-        for i in range(20):
-            if chrome_launch_process.poll() is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-            cdp_file = chrome_dir / 'cdp_url.txt'
-            if cdp_file.exists():
-                cdp_url = cdp_file.read_text().strip()
-                break
-            time.sleep(1)
-
-        assert cdp_url, "Chromium CDP URL not found after 20s"
+        try:
+            chrome_launch_process, cdp_url = launch_chromium_session(
+                env,
+                chrome_dir,
+                crawl_id,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+            )
+        except Exception as exc:
+            raise RuntimeError(
+                f"Chromium launch failed after waiting up to {CHROME_STARTUP_TIMEOUT_SECONDS}s"
+            ) from exc
+
         print(f"Chromium launched with CDP URL: {cdp_url}")
 
-        # Check that extensions were loaded
-        extensions_file = chrome_dir / 'extensions.json'
-        if extensions_file.exists():
-            loaded_exts = json.loads(extensions_file.read_text())
-            print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
+        loaded_exts = wait_for_extensions_metadata(chrome_dir, timeout_seconds=10)
+        print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
+        ext_entry = next(
+            (e for e in loaded_exts if e.get("name") == "istilldontcareaboutcookies"),
+            None,
+        )
+        assert ext_entry, (
+            f"istilldontcareaboutcookies not present in extensions.json: {loaded_exts}"
+        )
+        ext_id = ext_entry.get("id")
+        assert ext_id, f"Extension id missing from extensions.json entry: {ext_entry}"
 
         try:
             # Step 3: Connect to Chromium and verify extension loaded via options page
-            test_script = f'''
+            test_script = f"""
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
 
@@ -218,38 +244,8 @@ def test_extension_loads_in_chromium():
 
     // Wait for extension to initialize
     await new Promise(r => setTimeout(r, 2000));
-
-    // Find extension targets to get the extension ID
-    const targets = browser.targets();
-    const extTargets = targets.filter(t =>
-        t.url().startsWith('chrome-extension://') ||
-        t.type() === 'service_worker' ||
-        t.type() === 'background_page'
-    );
-
-    // Filter out Chrome's built-in extensions
-    const builtinIds = ['nkeimhogjdpnpccoofpliimaahmaaome', 'fignfifoniblkonapihmkfakmlgkbkcf',
-                       'ahfgeienlihckogmohjhadlkjgocpleb', 'mhjfbmdgcfjbbpaeojofohoefgiehjai'];
-    const customExtTargets = extTargets.filter(t => {{
-        const url = t.url();
-        if (!url.startsWith('chrome-extension://')) return false;
-        const extId = url.split('://')[1].split('/')[0];
-        return !builtinIds.includes(extId);
-    }});
-
-    console.error('Custom extension targets found:', customExtTargets.length);
-    customExtTargets.forEach(t => console.error('  -', t.type(), t.url()));
-
-    if (customExtTargets.length === 0) {{
-        console.log(JSON.stringify({{ loaded: false, error: 'No custom extension targets found' }}));
-        browser.disconnect();
-        return;
-    }}
-
-    // Get the extension ID from the first custom extension target
-    const extUrl = customExtTargets[0].url();
-    const extId = extUrl.split('://')[1].split('/')[0];
-    console.error('Extension ID:', extId);
+    const extId = '{ext_id}';
+    console.error('Extension ID from extensions.json:', extId);
 
     // Try to navigate to the extension's options.html page
     const page = await browser.newPage();
@@ -286,17 +282,17 @@ def test_extension_loads_in_chromium():
 
     browser.disconnect();
 }})();
-'''
-            script_path = tmpdir / 'test_extension.js'
+"""
+            script_path = tmpdir / "test_extension.js"
             script_path.write_text(test_script)
 
             result = subprocess.run(
-                ['node', str(script_path)],
+                ["node", str(script_path)],
                 cwd=str(tmpdir),
                 capture_output=True,
                 text=True,
                 env=env,
-                timeout=90
+                timeout=90,
             )
 
             print(f"stderr: {result.stderr}")
@@ -304,31 +300,27 @@ def test_extension_loads_in_chromium():
 
             assert result.returncode == 0, f"Test failed: {result.stderr}"
 
-            output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
+            output_lines = [
+                line
+                for line in result.stdout.strip().split("\n")
+                if line.startswith("{")
+            ]
             assert output_lines, f"No JSON output: {result.stdout}"
 
             test_result = json.loads(output_lines[-1])
-            assert test_result.get('loaded'), \
+            assert test_result.get("loaded"), (
                 f"Extension should be loaded in Chromium. Result: {test_result}"
+            )
             print(f"Extension loaded successfully: {test_result}")
 
         finally:
-            # Clean up Chromium
-            try:
-                chrome_launch_process.send_signal(signal.SIGTERM)
-                chrome_launch_process.wait(timeout=5)
-            except:
-                pass
-            chrome_pid_file = chrome_dir / 'chrome.pid'
-            if chrome_pid_file.exists():
-                try:
-                    chrome_pid = int(chrome_pid_file.read_text().strip())
-                    os.kill(chrome_pid, signal.SIGKILL)
-                except (OSError, ValueError):
-                    pass
-
-
-def check_cookie_consent_visibility(cdp_url: str, test_url: str, env: dict, script_dir: Path) -> dict:
+            if chrome_launch_process:
+                kill_chromium_session(chrome_launch_process, chrome_dir)
+
+
+def check_cookie_consent_visibility(
+    cdp_url: str, test_url: str, env: dict, script_dir: Path
+) -> dict:
     """Check if cookie consent elements are visible on a page.
 
     Returns dict with:
@@ -337,7 +329,7 @@ def check_cookie_consent_visibility(cdp_url: str, test_url: str, env: dict, scri
         - elements_found: list - all cookie-related elements found in DOM
         - html_snippet: str - snippet of the page HTML for debugging
     """
-    test_script = f'''
+    test_script = f"""
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
 
@@ -438,31 +430,35 @@ def check_cookie_consent_visibility(cdp_url: str, test_url: str, env: dict, scri
     browser.disconnect();
     console.log(JSON.stringify(result));
 }})();
-'''
-    script_path = script_dir / 'check_cookies.js'
+"""
+    script_path = script_dir / "check_cookies.js"
     script_path.write_text(test_script)
 
     result = subprocess.run(
-        ['node', str(script_path)],
+        ["node", str(script_path)],
         cwd=str(script_dir),
         capture_output=True,
         text=True,
         env=env,
-        timeout=90
+        timeout=90,
     )
 
     if result.returncode != 0:
         raise RuntimeError(f"Cookie check script failed: {result.stderr}")
 
-    output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
+    output_lines = [
+        line for line in result.stdout.strip().split("\n") if line.startswith("{")
+    ]
     if not output_lines:
-        raise RuntimeError(f"No JSON output from cookie check: {result.stdout}\nstderr: {result.stderr}")
+        raise RuntimeError(
+            f"No JSON output from cookie check: {result.stdout}\nstderr: {result.stderr}"
+        )
 
     return json.loads(output_lines[-1])
 
 
-def test_hides_cookie_consent_on_filmin():
-    """Live test: verify extension hides cookie consent popup on filmin.es.
+def test_hides_cookie_consent_on_static_page(httpserver):
+    """Verify extension hides cookie consent popup on a deterministic local page.
 
     This test runs TWO browser sessions:
     1. WITHOUT extension - verifies cookie consent IS visible (baseline)
@@ -471,39 +467,52 @@ def test_hides_cookie_consent_on_filmin():
     This ensures we're actually testing the extension's effect, not just
     that a page happens to not have cookie consent.
     """
+    httpserver.expect_request(COOKIE_TEST_PATH).respond_with_data(
+        COOKIE_TEST_HTML_STUB,
+        content_type="text/html; charset=utf-8",
+    )
+    test_url = httpserver.url_for(COOKIE_TEST_PATH)
+
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         # Set up isolated env with proper directory structure
         env_base = setup_test_env(tmpdir)
-        env_base['CHROME_HEADLESS'] = 'true'
+        env_base["CHROME_HEADLESS"] = "true"
 
-        ext_dir = Path(env_base['CHROME_EXTENSIONS_DIR'])
+        ext_dir = Path(env_base["CHROME_EXTENSIONS_DIR"])
 
         # ============================================================
         # STEP 1: BASELINE - Run WITHOUT extension, verify cookie consent IS visible
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 1: BASELINE TEST (no extension)")
-        print("="*60)
+        print("=" * 60)
 
-        personas_dir = Path(env_base['PERSONAS_DIR'])
+        personas_dir = Path(env_base["PERSONAS_DIR"])
 
         env_no_ext = env_base.copy()
-        env_no_ext['CHROME_EXTENSIONS_DIR'] = str(personas_dir / 'Default' / 'empty_extensions')
-        (personas_dir / 'Default' / 'empty_extensions').mkdir(parents=True, exist_ok=True)
+        env_no_ext["CHROME_EXTENSIONS_DIR"] = str(
+            personas_dir / "Default" / "empty_extensions"
+        )
+        (personas_dir / "Default" / "empty_extensions").mkdir(
+            parents=True, exist_ok=True
+        )
 
         # Launch baseline Chromium in crawls directory
-        baseline_crawl_id = 'baseline-no-ext'
-        baseline_crawl_dir = Path(env_base['CRAWL_DIR']) / baseline_crawl_id
+        baseline_crawl_id = "baseline-no-ext"
+        baseline_crawl_dir = Path(env_base["CRAWL_DIR"]) / baseline_crawl_id
         baseline_crawl_dir.mkdir(parents=True, exist_ok=True)
-        baseline_chrome_dir = baseline_crawl_dir / 'chrome'
-        env_no_ext['CRAWL_DIR'] = str(baseline_crawl_dir)
+        baseline_chrome_dir = baseline_crawl_dir / "chrome"
+        env_no_ext["CRAWL_DIR"] = str(baseline_crawl_dir)
         baseline_process = None
 
         try:
             baseline_process, baseline_cdp_url = launch_chromium_session(
-                env_no_ext, baseline_chrome_dir, baseline_crawl_id
+                env_no_ext,
+                baseline_chrome_dir,
+                baseline_crawl_id,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             )
             print(f"Baseline Chromium launched: {baseline_cdp_url}")
 
@@ -511,62 +520,70 @@ def test_hides_cookie_consent_on_filmin():
             time.sleep(2)
 
             baseline_result = check_cookie_consent_visibility(
-                baseline_cdp_url, TEST_URL, env_no_ext, tmpdir
+                baseline_cdp_url, test_url, env_no_ext, tmpdir
             )
 
-            print(f"Baseline result: visible={baseline_result['visible']}, "
-                  f"elements_found={len(baseline_result['elements_found'])}")
+            print(
+                f"Baseline result: visible={baseline_result['visible']}, "
+                f"elements_found={len(baseline_result['elements_found'])}"
+            )
 
-            if baseline_result['elements_found']:
+            if baseline_result["elements_found"]:
                 print("Elements found in baseline:")
-                for el in baseline_result['elements_found'][:5]:  # Show first 5
-                    print(f"  - {el['selector']}: visible={el['visible']}, "
-                          f"display={el['display']}, size={el['width']}x{el['height']}")
+                for el in baseline_result["elements_found"][:5]:  # Show first 5
+                    print(
+                        f"  - {el['selector']}: visible={el['visible']}, "
+                        f"display={el['display']}, size={el['width']}x{el['height']}"
+                    )
 
         finally:
             if baseline_process:
                 kill_chromium_session(baseline_process, baseline_chrome_dir)
 
         # Verify baseline shows cookie consent
-        if not baseline_result['visible']:
+        if not baseline_result["visible"]:
             # If no cookie consent visible in baseline, we can't test the extension
             # This could happen if:
             # - The site changed and no longer shows cookie consent
             # - Cookie consent is region-specific
             # - Our selectors don't match this site
             print("\nWARNING: No cookie consent visible in baseline!")
-            print(f"HTML has cookie keywords: {baseline_result.get('has_cookie_keyword_in_html')}")
+            print(
+                f"HTML has cookie keywords: {baseline_result.get('has_cookie_keyword_in_html')}"
+            )
             print(f"HTML snippet: {baseline_result.get('html_snippet', '')[:200]}")
 
             pytest.fail(
-                f"Cannot test extension: no cookie consent visible in baseline on {TEST_URL}. "
+                f"Cannot test extension: no cookie consent visible in baseline on {test_url}. "
                 f"Elements found: {len(baseline_result['elements_found'])}. "
-                f"The site may have changed or cookie consent may be region-specific."
+                "The fixture HTML may need to be updated."
             )
 
-        print(f"\n✓ Baseline confirmed: Cookie consent IS visible (selector: {baseline_result['selector']})")
+        print(
+            f"\n✓ Baseline confirmed: Cookie consent IS visible (selector: {baseline_result['selector']})"
+        )
 
         # ============================================================
         # STEP 2: Install the extension
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 2: INSTALLING EXTENSION")
-        print("="*60)
+        print("=" * 60)
 
         env_with_ext = env_base.copy()
-        env_with_ext['CHROME_EXTENSIONS_DIR'] = str(ext_dir)
+        env_with_ext["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
 
         result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
+            ["node", str(INSTALL_SCRIPT)],
             cwd=str(tmpdir),
             capture_output=True,
             text=True,
             env=env_with_ext,
-            timeout=60
+            timeout=60,
         )
         assert result.returncode == 0, f"Extension install failed: {result.stderr}"
 
-        cache_file = ext_dir / 'istilldontcareaboutcookies.extension.json'
+        cache_file = ext_dir / "istilldontcareaboutcookies.extension.json"
         assert cache_file.exists(), "Extension cache not created"
         ext_data = json.loads(cache_file.read_text())
         print(f"Extension installed: {ext_data.get('name')} v{ext_data.get('version')}")
@@ -574,45 +591,51 @@ def test_hides_cookie_consent_on_filmin():
         # ============================================================
         # STEP 3: Run WITH extension, verify cookie consent is HIDDEN
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 3: TEST WITH EXTENSION")
-        print("="*60)
+        print("=" * 60)
 
         # Launch extension test Chromium in crawls directory
-        ext_crawl_id = 'test-with-ext'
-        ext_crawl_dir = Path(env_base['CRAWL_DIR']) / ext_crawl_id
+        ext_crawl_id = "test-with-ext"
+        ext_crawl_dir = Path(env_base["CRAWL_DIR"]) / ext_crawl_id
         ext_crawl_dir.mkdir(parents=True, exist_ok=True)
-        ext_chrome_dir = ext_crawl_dir / 'chrome'
-        env_with_ext['CRAWL_DIR'] = str(ext_crawl_dir)
+        ext_chrome_dir = ext_crawl_dir / "chrome"
+        env_with_ext["CRAWL_DIR"] = str(ext_crawl_dir)
         ext_process = None
 
         try:
             ext_process, ext_cdp_url = launch_chromium_session(
-                env_with_ext, ext_chrome_dir, ext_crawl_id
+                env_with_ext,
+                ext_chrome_dir,
+                ext_crawl_id,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             )
             print(f"Extension Chromium launched: {ext_cdp_url}")
 
-            # Check that extension was loaded
-            extensions_file = ext_chrome_dir / 'extensions.json'
-            if extensions_file.exists():
-                loaded_exts = json.loads(extensions_file.read_text())
-                print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
+            loaded_exts = wait_for_extensions_metadata(
+                ext_chrome_dir, timeout_seconds=10
+            )
+            print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
 
             # Wait for extension to initialize
             time.sleep(3)
 
             ext_result = check_cookie_consent_visibility(
-                ext_cdp_url, TEST_URL, env_with_ext, tmpdir
+                ext_cdp_url, test_url, env_with_ext, tmpdir
             )
 
-            print(f"Extension result: visible={ext_result['visible']}, "
-                  f"elements_found={len(ext_result['elements_found'])}")
+            print(
+                f"Extension result: visible={ext_result['visible']}, "
+                f"elements_found={len(ext_result['elements_found'])}"
+            )
 
-            if ext_result['elements_found']:
+            if ext_result["elements_found"]:
                 print("Elements found with extension:")
-                for el in ext_result['elements_found'][:5]:
-                    print(f"  - {el['selector']}: visible={el['visible']}, "
-                          f"display={el['display']}, size={el['width']}x{el['height']}")
+                for el in ext_result["elements_found"][:5]:
+                    print(
+                        f"  - {el['selector']}: visible={el['visible']}, "
+                        f"display={el['display']}, size={el['width']}x{el['height']}"
+                    )
 
         finally:
             if ext_process:
@@ -621,21 +644,25 @@ def test_hides_cookie_consent_on_filmin():
         # ============================================================
         # STEP 4: Compare results
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 4: COMPARISON")
-        print("="*60)
-        print(f"Baseline (no extension): cookie consent visible = {baseline_result['visible']}")
+        print("=" * 60)
+        print(
+            f"Baseline (no extension): cookie consent visible = {baseline_result['visible']}"
+        )
         print(f"With extension: cookie consent visible = {ext_result['visible']}")
 
-        assert baseline_result['visible'], \
+        assert baseline_result["visible"], (
             "Baseline should show cookie consent (this shouldn't happen, we checked above)"
+        )
 
-        assert not ext_result['visible'], \
-            f"Cookie consent should be HIDDEN by extension.\n" \
-            f"Baseline showed consent at: {baseline_result['selector']}\n" \
-            f"But with extension, consent is still visible.\n" \
+        assert not ext_result["visible"], (
+            f"Cookie consent should be HIDDEN by extension.\n"
+            f"Baseline showed consent at: {baseline_result['selector']}\n"
+            f"But with extension, consent is still visible.\n"
             f"Elements still visible: {[e for e in ext_result['elements_found'] if e['visible']]}"
+        )
 
         print("\n✓ SUCCESS: Extension correctly hides cookie consent!")
         print(f"  - Baseline showed consent at: {baseline_result['selector']}")
-        print(f"  - Extension successfully hid it")
+        print("  - Extension successfully hid it")
diff --git a/abx_plugins/plugins/mercury/on_Crawl__40_mercury_install.py b/abx_plugins/plugins/mercury/on_Crawl__40_mercury_install.py
index 6571f03..5d3ebd5 100755
--- a/abx_plugins/plugins/mercury/on_Crawl__40_mercury_install.py
+++ b/abx_plugins/plugins/mercury/on_Crawl__40_mercury_install.py
@@ -16,52 +16,53 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
 def output_binary(name: str, binproviders: str):
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
     record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'overrides': {
-            'npm': {
-                'packages': ['@postlight/parser'],
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "overrides": {
+            "npm": {
+                "packages": ["@postlight/parser"],
             }
         },
-        'machine_id': machine_id,
+        "machine_id": machine_id,
     }
     print(json.dumps(record))
 
 
 def main():
-    mercury_enabled = get_env_bool('MERCURY_ENABLED', True)
+    mercury_enabled = get_env_bool("MERCURY_ENABLED", True)
 
     if not mercury_enabled:
         sys.exit(0)
 
-    output_binary(name='postlight-parser', binproviders='npm,env')
+    output_binary(name="postlight-parser", binproviders="npm,env")
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/mercury/on_Snapshot__57_mercury.py b/abx_plugins/plugins/mercury/on_Snapshot__57_mercury.py
index a85a275..d2d3b96 100755
--- a/abx_plugins/plugins/mercury/on_Snapshot__57_mercury.py
+++ b/abx_plugins/plugins/mercury/on_Snapshot__57_mercury.py
@@ -24,23 +24,25 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'mercury'
-BIN_NAME = 'postlight-parser'
-BIN_PROVIDERS = 'npm,env'
+PLUGIN_NAME = "mercury"
+BIN_NAME = "postlight-parser"
+BIN_PROVIDERS = "npm,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-def get_env(name: str, default: str = '') -> str:
+
+
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -54,7 +56,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -72,39 +74,47 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
 
     Returns: (success, output_path, error_message)
     """
-    timeout = get_env_int('MERCURY_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    mercury_args = get_env_array('MERCURY_ARGS', [])
-    mercury_args_extra = get_env_array('MERCURY_ARGS_EXTRA', [])
+    timeout = get_env_int("MERCURY_TIMEOUT") or get_env_int("TIMEOUT", 60)
+    mercury_args = get_env_array("MERCURY_ARGS", [])
+    mercury_args_extra = get_env_array("MERCURY_ARGS_EXTRA", [])
 
     # Output directory is current directory (hook already runs in output dir)
     output_dir = Path(OUTPUT_DIR)
 
     try:
         # Get text version
-        cmd_text = [binary, *mercury_args, *mercury_args_extra, url, '--format=text']
-        result_text = subprocess.run(cmd_text, stdout=subprocess.PIPE, timeout=timeout, text=True)
+        cmd_text = [binary, *mercury_args, *mercury_args_extra, url, "--format=text"]
+        result_text = subprocess.run(
+            cmd_text, stdout=subprocess.PIPE, timeout=timeout, text=True
+        )
         if result_text.stdout:
             sys.stderr.write(result_text.stdout)
             sys.stderr.flush()
 
         if result_text.returncode != 0:
-            return False, None, f'postlight-parser failed (exit={result_text.returncode})'
+            return (
+                False,
+                None,
+                f"postlight-parser failed (exit={result_text.returncode})",
+            )
 
         try:
             text_json = json.loads(result_text.stdout)
         except json.JSONDecodeError:
-            return False, None, 'postlight-parser returned invalid JSON'
+            return False, None, "postlight-parser returned invalid JSON"
 
-        if text_json.get('failed'):
-            return False, None, 'Mercury was not able to extract article'
+        if text_json.get("failed"):
+            return False, None, "Mercury was not able to extract article"
 
         # Save text content
-        text_content = text_json.get('content', '')
-        (output_dir / 'content.txt').write_text(text_content, encoding='utf-8')
+        text_content = text_json.get("content", "")
+        (output_dir / "content.txt").write_text(text_content, encoding="utf-8")
 
         # Get HTML version
-        cmd_html = [binary, *mercury_args, *mercury_args_extra, url, '--format=html']
-        result_html = subprocess.run(cmd_html, stdout=subprocess.PIPE, timeout=timeout, text=True)
+        cmd_html = [binary, *mercury_args, *mercury_args_extra, url, "--format=html"]
+        result_html = subprocess.run(
+            cmd_html, stdout=subprocess.PIPE, timeout=timeout, text=True
+        )
         if result_html.stdout:
             sys.stderr.write(result_html.stdout)
             sys.stderr.flush()
@@ -115,26 +125,30 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
             html_json = {}
 
         # Save HTML content and metadata
-        html_content = html_json.pop('content', '')
+        html_content = html_json.pop("content", "")
         # Some sources return HTML-escaped markup inside the content blob.
         # If it looks heavily escaped, unescape once so it renders properly.
         if html_content:
-            escaped_count = html_content.count('&lt;') + html_content.count('&gt;')
-            tag_count = html_content.count('<')
+            escaped_count = html_content.count("&lt;") + html_content.count("&gt;")
+            tag_count = html_content.count("<")
             if escaped_count and escaped_count > tag_count * 2:
                 html_content = html.unescape(html_content)
-        (output_dir / 'content.html').write_text(html_content, encoding='utf-8')
+        (output_dir / "content.html").write_text(html_content, encoding="utf-8")
 
         # Save article metadata
-        metadata = {k: v for k, v in text_json.items() if k != 'content'}
-        (output_dir / 'article.json').write_text(json.dumps(metadata, indent=2), encoding='utf-8')
+        metadata = {k: v for k, v in text_json.items() if k != "content"}
+        (output_dir / "article.json").write_text(
+            json.dumps(metadata, indent=2), encoding="utf-8"
+        )
 
         # Link images/ to responses capture (if available)
         try:
-            hostname = urlparse(url).hostname or ''
+            hostname = urlparse(url).hostname or ""
             if hostname:
-                responses_images = (output_dir / '..' / 'responses' / 'image' / hostname / 'images').resolve()
-                link_path = output_dir / 'images'
+                responses_images = (
+                    output_dir / ".." / "responses" / "image" / hostname / "images"
+                ).resolve()
+                link_path = output_dir / "images"
                 if responses_images.exists() and responses_images.is_dir():
                     if link_path.exists() or link_path.is_symlink():
                         if link_path.is_symlink() or link_path.is_file():
@@ -143,34 +157,36 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
                             # Don't remove real directories
                             responses_images = None
                     if responses_images:
-                        rel_target = os.path.relpath(str(responses_images), str(output_dir))
+                        rel_target = os.path.relpath(
+                            str(responses_images), str(output_dir)
+                        )
                         link_path.symlink_to(rel_target)
         except Exception:
             pass
 
-        return True, 'content.html', ''
+        return True, "content.html", ""
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to extract article from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to extract article from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Extract article content using Postlight's Mercury Parser."""
 
     try:
         # Check if mercury extraction is enabled
-        if not get_env_bool('MERCURY_ENABLED', True):
-            print('Skipping mercury (MERCURY_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("MERCURY_ENABLED", True):
+            print("Skipping mercury (MERCURY_ENABLED=False)", file=sys.stderr)
             # Temporary failure (config disabled) - NO JSONL emission
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('MERCURY_BINARY', 'postlight-parser')
+        binary = get_env("MERCURY_BINARY", "postlight-parser")
 
         # Run extraction
         success, output, error = extract_mercury(url, binary)
@@ -178,22 +194,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/mercury/tests/test_mercury.py b/abx_plugins/plugins/mercury/tests/test_mercury.py
index cc7490c..c95c5f9 100644
--- a/abx_plugins/plugins/mercury/tests/test_mercury.py
+++ b/abx_plugins/plugins/mercury/tests/test_mercury.py
@@ -5,29 +5,150 @@
 1. Hook script exists
 2. Dependencies installed via validation hooks
 3. Verify deps with abx-pkg
-4. Mercury extraction works on https://example.com
+4. Mercury extraction works on deterministic local fixture HTML
 5. JSONL output is correct
 6. Filesystem output contains extracted content
 7. Config options work
 """
 
 import json
+import os
 import subprocess
 import sys
 import tempfile
+import uuid
 from pathlib import Path
 import pytest
 
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_plugin_dir,
     get_hook_script,
-    PLUGINS_ROOT,
 )
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-MERCURY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_mercury.*')
-TEST_URL = 'https://example.com'
+PLUGINS_ROOT = PLUGIN_DIR.parent
+_MERCURY_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_mercury.*")
+if _MERCURY_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+MERCURY_HOOK = _MERCURY_HOOK
+TEST_URL = "https://example.com"
+
+# Module-level cache for binary path
+_mercury_binary_path = None
+_mercury_lib_root = None
+
+
+def require_mercury_binary() -> str:
+    """Return postlight-parser binary path or fail with actionable context."""
+    binary_path = get_mercury_binary_path()
+    assert binary_path, (
+        "postlight-parser installation failed. Install hook should install "
+        "the binary automatically in this test environment."
+    )
+    assert Path(binary_path).is_file(), (
+        f"postlight-parser binary path invalid: {binary_path}"
+    )
+    return binary_path
+
+
+def get_mercury_binary_path():
+    """Get postlight-parser path from cache or by running install hooks."""
+    global _mercury_binary_path
+    if _mercury_binary_path and Path(_mercury_binary_path).is_file():
+        return _mercury_binary_path
+
+    from abx_pkg import Binary, NpmProvider, EnvProvider
+
+    try:
+        binary = Binary(
+            name="postlight-parser",
+            binproviders=[NpmProvider(), EnvProvider()],
+            overrides={"npm": {"packages": ["@postlight/parser"]}},
+        ).load()
+        if binary and binary.abspath:
+            _mercury_binary_path = str(binary.abspath)
+            return _mercury_binary_path
+    except Exception:
+        pass
+
+    npm_hook = PLUGINS_ROOT / "npm" / "on_Binary__10_npm_install.py"
+    crawl_hook = PLUGIN_DIR / "on_Crawl__40_mercury_install.py"
+    if not npm_hook.exists():
+        return None
+
+    binary_id = str(uuid.uuid4())
+    machine_id = str(uuid.uuid4())
+    binproviders = "*"
+    overrides = None
+
+    if crawl_hook.exists():
+        crawl_result = subprocess.run(
+            [sys.executable, str(crawl_hook)],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        for line in crawl_result.stdout.strip().split("\n"):
+            if not line.strip().startswith("{"):
+                continue
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if (
+                record.get("type") == "Binary"
+                and record.get("name") == "postlight-parser"
+            ):
+                binproviders = record.get("binproviders", "*")
+                overrides = record.get("overrides")
+                break
+
+    global _mercury_lib_root
+    if not _mercury_lib_root:
+        _mercury_lib_root = tempfile.mkdtemp(prefix="mercury-lib-")
+
+    env = os.environ.copy()
+    env["HOME"] = str(_mercury_lib_root)
+    env["SNAP_DIR"] = str(Path(_mercury_lib_root) / "data")
+    env["CRAWL_DIR"] = str(Path(_mercury_lib_root) / "crawl")
+    env.pop("LIB_DIR", None)
+
+    cmd = [
+        sys.executable,
+        str(npm_hook),
+        "--binary-id",
+        binary_id,
+        "--machine-id",
+        machine_id,
+        "--name",
+        "postlight-parser",
+        f"--binproviders={binproviders}",
+    ]
+    if overrides:
+        cmd.append(f"--overrides={json.dumps(overrides)}")
+
+    install_result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=300,
+        env=env,
+    )
+
+    for line in install_result.stdout.strip().split("\n"):
+        if not line.strip().startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get("type") == "Binary" and record.get("name") == "postlight-parser":
+            _mercury_binary_path = record.get("abspath")
+            return _mercury_binary_path
+
+    return None
+
 
 def test_hook_script_exists():
     """Verify on_Snapshot hook exists."""
@@ -35,78 +156,161 @@ def test_hook_script_exists():
 
 
 def test_verify_deps_with_abx_pkg():
-    """Verify postlight-parser is available via abx-pkg."""
-    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
-
-    # Verify postlight-parser is available
-    mercury_binary = Binary(
-        name='postlight-parser',
-        binproviders=[NpmProvider(), EnvProvider()],
-        overrides={'npm': {'packages': ['@postlight/parser']}}
+    """Verify postlight-parser is installed by real plugin install hooks."""
+    binary_path = require_mercury_binary()
+    assert Path(binary_path).is_file(), (
+        f"Binary path must be a valid file: {binary_path}"
     )
-    mercury_loaded = mercury_binary.load()
 
-    # If validate hook found it (exit 0), this should succeed
-    # If validate hook didn't find it (exit 1), this may fail unless binprovider installed it
-    if mercury_loaded and mercury_loaded.abspath:
-        assert True, "postlight-parser is available"
-    else:
-        pass
 
-def test_extracts_with_mercury_parser():
-    """Test full workflow: extract with postlight-parser from real HTML via hook."""
-    # Prerequisites checked by earlier test
+def test_extracts_with_mercury_parser(httpserver):
+    """Test full workflow: extract with postlight-parser from local fixture HTML."""
+    binary_path = require_mercury_binary()
+    test_url = httpserver.url_for("/mercury-article")
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
         snap_dir = tmpdir
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(snap_dir)
-
-        # Create HTML source that mercury can parse
-        (snap_dir / 'singlefile').mkdir()
-        (snap_dir / 'singlefile' / 'singlefile.html').write_text(
-            '<html><head><title>Test Article</title></head><body>'
-            '<article><h1>Example Article</h1><p>This is test content for mercury parser.</p></article>'
-            '</body></html>'
+        env["SNAP_DIR"] = str(snap_dir)
+        env["MERCURY_BINARY"] = binary_path
+
+        # Serve deterministic HTML source that mercury can parse.
+        httpserver.expect_request("/mercury-article").respond_with_data(
+            "<html><head><title>Test Article</title></head><body>"
+            "<article><h1>Example Article</h1><p>This is test content for mercury parser.</p></article>"
+            "</body></html>",
+            content_type="text/html; charset=utf-8",
         )
 
         # Run mercury extraction hook
         result = subprocess.run(
-            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(MERCURY_HOOK),
+                "--url",
+                test_url,
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             timeout=60,
-            env=env
+            env=env,
         )
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify filesystem output (hook writes to current directory)
-        output_file = snap_dir / 'mercury' / 'content.html'
+        output_file = snap_dir / "mercury" / "content.html"
         assert output_file.exists(), "content.html not created"
 
         content = output_file.read_text()
         assert len(content) > 0, "Output should not be empty"
 
+
+def test_extracts_with_local_html_source_present(httpserver):
+    """Test real mercury extraction when local singlefile source is present."""
+    binary_path = require_mercury_binary()
+    test_url = httpserver.url_for("/mercury-with-local-source")
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        httpserver.expect_request("/mercury-with-local-source").respond_with_data(
+            "<html><head><title>Remote Source</title></head><body>"
+            "<article><h1>Remote Source Marker</h1><p>Fetched URL content for mercury parser.</p></article>"
+            "</body></html>",
+            content_type="text/html; charset=utf-8",
+        )
+
+        # Create local singlefile source to cover the 'local source exists' path.
+        singlefile_dir = tmpdir / "singlefile"
+        singlefile_dir.mkdir(parents=True, exist_ok=True)
+        (singlefile_dir / "singlefile.html").write_text(
+            "<html><head><title>Local Source</title></head><body>"
+            "<article><h1>Local Source Marker</h1><p>Local singlefile fixture content.</p></article>"
+            "</body></html>",
+            encoding="utf-8",
+        )
+
+        env = os.environ.copy()
+        env["SNAP_DIR"] = str(tmpdir)
+        env["MERCURY_BINARY"] = binary_path
+
+        result = subprocess.run(
+            [
+                sys.executable,
+                str(MERCURY_HOOK),
+                "--url",
+                test_url,
+                "--snapshot-id",
+                "test-local-source",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=60,
+            env=env,
+        )
+
+        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
+
+        result_json = None
+        for line in result.stdout.strip().split("\n"):
+            line = line.strip()
+            if line.startswith("{"):
+                try:
+                    record = json.loads(line)
+                    if record.get("type") == "ArchiveResult":
+                        result_json = record
+                        break
+                except json.JSONDecodeError:
+                    pass
+
+        assert result_json, "Should have ArchiveResult JSONL output"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
+
+        output_file = tmpdir / "mercury" / "content.html"
+        assert output_file.exists(), "content.html not created"
+
+        extracted_html = output_file.read_text(errors="ignore")
+        extracted_lower = extracted_html.lower()
+        assert len(extracted_html) > 50, "Extracted HTML should not be trivially short"
+        assert "<" in extracted_lower and ">" in extracted_lower, (
+            f"Extracted HTML does not look like HTML. Output: {extracted_html[:500]}"
+        )
+
+        content_txt = tmpdir / "mercury" / "content.txt"
+        assert content_txt.exists(), "content.txt not created"
+        extracted_text = content_txt.read_text(errors="ignore").strip()
+        assert len(extracted_text) > 10, "Extracted text should not be empty"
+
+        article_json = tmpdir / "mercury" / "article.json"
+        assert article_json.exists(), "article.json not created"
+        metadata = json.loads(article_json.read_text())
+        assert metadata.get("title"), (
+            f"Expected non-empty title in metadata: {metadata}"
+        )
+
+
 def test_config_save_mercury_false_skips():
     """Test that MERCURY_ENABLED=False exits without emitting JSONL."""
     import os
@@ -114,56 +318,110 @@ def test_config_save_mercury_false_skips():
     with tempfile.TemporaryDirectory() as tmpdir:
         snap_dir = Path(tmpdir)
         env = os.environ.copy()
-        env['MERCURY_ENABLED'] = 'False'
-        env['SNAP_DIR'] = str(snap_dir)
+        env["MERCURY_ENABLED"] = "False"
+        env["SNAP_DIR"] = str(snap_dir)
 
         result = subprocess.run(
-            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(MERCURY_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
+
 
+def test_extracts_without_local_html_source(httpserver):
+    """Test real mercury extraction from fetched HTML when no local source file exists."""
+    binary_path = require_mercury_binary()
+    test_url = httpserver.url_for("/mercury-no-html-source")
 
-def test_fails_gracefully_without_html():
-    """Test that mercury works even without HTML source (fetches URL directly)."""
     with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        httpserver.expect_request("/mercury-no-html-source").respond_with_data(
+            "<html><head><title>No Local HTML Source</title></head><body>"
+            "<article><h1>Remote Article</h1><p>Fetched directly by mercury parser.</p></article>"
+            "</body></html>",
+            content_type="text/html; charset=utf-8",
+        )
+
+        # Ensure this path tests remote fetch extraction (no local singlefile source exists).
+        assert not (tmpdir / "singlefile" / "singlefile.html").exists()
+
+        env = os.environ.copy()
+        env["MERCURY_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
         result = subprocess.run(
-            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(MERCURY_HOOK),
+                "--url",
+                test_url,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=30
+            env=env,
+            timeout=60,
         )
 
-        # Mercury fetches URL directly with postlight-parser, doesn't need HTML source
+        assert result.returncode == 0, f"Mercury fetch/parse failed: {result.stderr}"
+
+        # Mercury fetches URL directly with postlight-parser, doesn't need local HTML source
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
-        # Mercury should succeed or fail based on network, not based on HTML source
         assert result_json, "Should emit ArchiveResult"
-        assert result_json['status'] in ['succeeded', 'failed'], f"Should succeed or fail: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
+
+        output_file = tmpdir / "mercury" / "content.html"
+        assert output_file.exists(), "content.html not created"
+
+        extracted_html = output_file.read_text(errors="ignore")
+        extracted_lower = extracted_html.lower()
+        assert len(extracted_html) > 50, "Extracted HTML should not be trivially short"
+        assert (
+            "remote article" in extracted_lower or "fetched directly" in extracted_lower
+        ), f"Expected extracted article content missing. Output: {extracted_html[:500]}"
+
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/modalcloser/tests/test_modalcloser.py b/abx_plugins/plugins/modalcloser/tests/test_modalcloser.py
index 9f6ad20..f9fbedf 100644
--- a/abx_plugins/plugins/modalcloser/tests/test_modalcloser.py
+++ b/abx_plugins/plugins/modalcloser/tests/test_modalcloser.py
@@ -13,7 +13,6 @@
 """
 
 import json
-import os
 import signal
 import subprocess
 import time
@@ -22,6 +21,8 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 # Import shared Chrome test helpers
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
@@ -30,9 +31,32 @@
 
 
 PLUGIN_DIR = Path(__file__).parent.parent
-MODALCLOSER_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_modalcloser.*'), None)
-TEST_URL = 'https://www.singsing.movie/'
-COOKIE_CONSENT_TEST_URL = 'https://www.filmin.es/'
+MODALCLOSER_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_modalcloser.*"), None)
+TEST_URL = "https://www.singsing.movie/"
+COOKIE_CONSENT_TEST_URL = "https://www.filmin.es/"
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+
+
+def _modal_page_url(httpserver) -> str:
+    """Serve a deterministic page with visible modal/cookie elements."""
+    html = """<!doctype html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Modal Fixture</title>
+</head>
+<body class="modal-open" style="overflow: hidden;">
+  <main><h1>Modal Fixture</h1></main>
+  <div id="cookie-consent" class="cookie-banner" style="display:block; visibility:visible; position:fixed; inset:0; background: rgba(0,0,0,0.8); z-index:9999;">
+    Cookie banner
+  </div>
+</body>
+</html>
+"""
+    httpserver.expect_request("/modal").respond_with_data(
+        html, content_type="text/html; charset=utf-8"
+    )
+    return httpserver.url_for("/modal")
 
 
 def test_hook_script_exists():
@@ -45,160 +69,208 @@ def test_verify_deps_with_abx_pkg():
     """Verify dependencies are available via abx-pkg after hook installation."""
     from abx_pkg import Binary, EnvProvider
 
-    EnvProvider.model_rebuild()
-
     # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_binary = Binary(name="node", binproviders=[EnvProvider()])
     node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for modalcloser plugin"
+    assert node_loaded and node_loaded.abspath, (
+        "Node.js required for modalcloser plugin"
+    )
 
 
 def test_config_modalcloser_disabled_skips():
     """Test that MODALCLOSER_ENABLED=False exits without emitting JSONL."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
-        env['MODALCLOSER_ENABLED'] = 'False'
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
+        env["MODALCLOSER_ENABLED"] = "False"
 
         result = subprocess.run(
-            ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
+            [
+                "node",
+                str(MODALCLOSER_HOOK),
+                f"--url={TEST_URL}",
+                "--snapshot-id=test-disabled",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
+        )
 
 
 def test_fails_gracefully_without_chrome_session():
     """Test that hook fails gracefully when no chrome session exists."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
-        modalcloser_dir = snap_dir / 'modalcloser'
+        snap_dir = tmpdir / "snap"
+        modalcloser_dir = snap_dir / "modalcloser"
         modalcloser_dir.mkdir(parents=True, exist_ok=True)
 
         result = subprocess.run(
-            ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
+            [
+                "node",
+                str(MODALCLOSER_HOOK),
+                f"--url={TEST_URL}",
+                "--snapshot-id=test-no-chrome",
+            ],
             cwd=modalcloser_dir,
             capture_output=True,
             text=True,
-            env=get_test_env() | {'SNAP_DIR': str(snap_dir)},
-            timeout=30
+            env=get_test_env() | {"SNAP_DIR": str(snap_dir)},
+            timeout=30,
         )
 
         # Should fail (exit 1) when no chrome session
         assert result.returncode != 0, "Should fail when no chrome session exists"
         # Error could be about chrome/CDP not found, or puppeteer module missing
         err_lower = result.stderr.lower()
-        assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
+        assert any(x in err_lower for x in ["chrome", "cdp", "puppeteer", "module"]), (
             f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
+        )
 
 
-def test_background_script_handles_sigterm():
+def test_background_script_handles_sigterm(httpserver):
     """Test that background script runs and handles SIGTERM correctly."""
     with tempfile.TemporaryDirectory() as tmpdir:
         modalcloser_process = None
         try:
+            test_url = _modal_page_url(httpserver)
             with chrome_session(
                 Path(tmpdir),
-                crawl_id='test-modalcloser',
-                snapshot_id='snap-modalcloser',
-                test_url=TEST_URL,
+                crawl_id="test-modalcloser",
+                snapshot_id="snap-modalcloser",
+                test_url=test_url,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
                 # Create modalcloser output directory (sibling to chrome)
-                modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
+                modalcloser_dir = snapshot_chrome_dir.parent / "modalcloser"
                 modalcloser_dir.mkdir()
 
                 # Run modalcloser as background process (use env from setup_chrome_session)
-                env['MODALCLOSER_POLL_INTERVAL'] = '200'  # Faster polling for test
+                env["MODALCLOSER_POLL_INTERVAL"] = "200"  # Faster polling for test
 
                 modalcloser_process = subprocess.Popen(
-                    ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-modalcloser'],
+                    [
+                        "node",
+                        str(MODALCLOSER_HOOK),
+                        f"--url={test_url}",
+                        "--snapshot-id=snap-modalcloser",
+                    ],
                     cwd=str(modalcloser_dir),
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     text=True,
-                    env=env
+                    env=env,
                 )
 
                 # Let it run for a bit
                 time.sleep(2)
 
                 # Verify it's still running (background script)
-                assert modalcloser_process.poll() is None, "Modalcloser should still be running as background process"
+                assert modalcloser_process.poll() is None, (
+                    "Modalcloser should still be running as background process"
+                )
 
                 # Send SIGTERM
                 modalcloser_process.send_signal(signal.SIGTERM)
                 stdout, stderr = modalcloser_process.communicate(timeout=5)
 
-                assert modalcloser_process.returncode == 0, f"Should exit 0 on SIGTERM: {stderr}"
+                assert modalcloser_process.returncode == 0, (
+                    f"Should exit 0 on SIGTERM: {stderr}"
+                )
 
                 # Parse JSONL output
                 result_json = None
-                for line in stdout.strip().split('\n'):
+                for line in stdout.strip().split("\n"):
                     line = line.strip()
-                    if line.startswith('{'):
+                    if line.startswith("{"):
                         try:
                             record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
+                            if record.get("type") == "ArchiveResult":
                                 result_json = record
                                 break
                         except json.JSONDecodeError:
                             pass
 
-                assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {stdout}"
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+                assert result_json is not None, (
+                    f"Should have ArchiveResult JSONL output. Stdout: {stdout}"
+                )
+                assert result_json["status"] == "succeeded", (
+                    f"Should succeed: {result_json}"
+                )
 
                 # Verify output_str format
-                output_str = result_json.get('output_str', '')
-                assert 'modal' in output_str.lower() or 'dialog' in output_str.lower(), \
-                    f"output_str should mention modals/dialogs: {output_str}"
+                output_str = result_json.get("output_str", "")
+                assert "closed" in output_str.lower(), (
+                    f"output_str should report closed modal/dialog counts: {output_str}"
+                )
+                assert "no modals detected" not in output_str.lower(), (
+                    f"Should close at least one modal/dialog: {output_str}"
+                )
 
                 # Verify no files created in output directory
                 output_files = list(modalcloser_dir.iterdir())
-                assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
+                assert len(output_files) == 0, (
+                    f"Should not create any files, but found: {output_files}"
+                )
 
         finally:
             if modalcloser_process and modalcloser_process.poll() is None:
                 modalcloser_process.kill()
 
 
-def test_dialog_handler_logs_dialogs():
+def test_dialog_handler_logs_dialogs(httpserver):
     """Test that dialog handler is set up correctly."""
     with tempfile.TemporaryDirectory() as tmpdir:
         modalcloser_process = None
         try:
+            test_url = _modal_page_url(httpserver)
             with chrome_session(
-                    Path(tmpdir),
-                    crawl_id='test-dialog',
-                    snapshot_id='snap-dialog',
-                    test_url=TEST_URL,
+                Path(tmpdir),
+                crawl_id="test-dialog",
+                snapshot_id="snap-dialog",
+                test_url=test_url,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-                modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
+                modalcloser_dir = snapshot_chrome_dir.parent / "modalcloser"
                 modalcloser_dir.mkdir()
 
                 # Use env from setup_chrome_session
-                env['MODALCLOSER_TIMEOUT'] = '100'  # Fast timeout for test
-                env['MODALCLOSER_POLL_INTERVAL'] = '200'
+                env["MODALCLOSER_TIMEOUT"] = "100"  # Fast timeout for test
+                env["MODALCLOSER_POLL_INTERVAL"] = "200"
 
                 modalcloser_process = subprocess.Popen(
-                    ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-dialog'],
+                    [
+                        "node",
+                        str(MODALCLOSER_HOOK),
+                        f"--url={test_url}",
+                        "--snapshot-id=snap-dialog",
+                    ],
                     cwd=str(modalcloser_dir),
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     text=True,
-                    env=env
+                    env=env,
                 )
 
                 # Let it run briefly
@@ -213,42 +285,51 @@ def test_dialog_handler_logs_dialogs():
                 modalcloser_process.send_signal(signal.SIGTERM)
                 stdout, stderr = modalcloser_process.communicate(timeout=5)
 
-                assert 'listening' in stderr.lower() or 'modalcloser' in stderr.lower(), \
-                    f"Should log startup message: {stderr}"
-                assert modalcloser_process.returncode == 0, f"Should exit cleanly: {stderr}"
+                assert (
+                    "listening" in stderr.lower() or "modalcloser" in stderr.lower()
+                ), f"Should log startup message: {stderr}"
+                assert modalcloser_process.returncode == 0, (
+                    f"Should exit cleanly: {stderr}"
+                )
 
         finally:
             if modalcloser_process and modalcloser_process.poll() is None:
                 modalcloser_process.kill()
 
 
-def test_config_poll_interval():
+def test_config_poll_interval(httpserver):
     """Test that MODALCLOSER_POLL_INTERVAL config is respected."""
     with tempfile.TemporaryDirectory() as tmpdir:
         chrome_launch_process = None
         chrome_pid = None
         modalcloser_process = None
         try:
+            test_url = _modal_page_url(httpserver)
             with chrome_session(
-                    Path(tmpdir),
-                    crawl_id='test-poll',
-                    snapshot_id='snap-poll',
-                    test_url=TEST_URL,
+                Path(tmpdir),
+                crawl_id="test-poll",
+                snapshot_id="snap-poll",
+                test_url=test_url,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-                modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
+                modalcloser_dir = snapshot_chrome_dir.parent / "modalcloser"
                 modalcloser_dir.mkdir()
 
                 # Set very short poll interval (use env from setup_chrome_session)
-                env['MODALCLOSER_POLL_INTERVAL'] = '100'  # 100ms
+                env["MODALCLOSER_POLL_INTERVAL"] = "100"  # 100ms
 
                 modalcloser_process = subprocess.Popen(
-                    ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-poll'],
+                    [
+                        "node",
+                        str(MODALCLOSER_HOOK),
+                        f"--url={test_url}",
+                        "--snapshot-id=snap-poll",
+                    ],
                     cwd=str(modalcloser_dir),
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     text=True,
-                    env=env
+                    env=env,
                 )
 
                 # Run for short time
@@ -265,18 +346,24 @@ def test_config_poll_interval():
 
                 # Verify JSONL output exists
                 result_json = None
-                for line in stdout.strip().split('\n'):
-                    if line.strip().startswith('{'):
+                for line in stdout.strip().split("\n"):
+                    if line.strip().startswith("{"):
                         try:
                             record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
+                            if record.get("type") == "ArchiveResult":
                                 result_json = record
                                 break
                         except json.JSONDecodeError:
                             pass
 
                 assert result_json is not None, "Should have JSONL output"
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+                assert result_json["status"] == "succeeded", (
+                    f"Should succeed: {result_json}"
+                )
+                output_str = result_json.get("output_str", "").lower()
+                assert (
+                    "closed" in output_str and "no modals detected" not in output_str
+                ), f"Should report closing modals/dialogs: {result_json}"
 
         finally:
             if modalcloser_process and modalcloser_process.poll() is None:
@@ -286,7 +373,7 @@ def test_config_poll_interval():
 def test_hides_cookie_consent_on_filmin():
     """Live test: verify modalcloser hides cookie consent popup on filmin.es."""
     # Create a test script that uses puppeteer directly
-    test_script = '''
+    test_script = """
 const puppeteer = require('puppeteer-core');
 
 async function closeModals(page) {
@@ -412,24 +499,24 @@ def test_hides_cookie_consent_on_filmin():
     console.error('Error:', e.message);
     process.exit(1);
 });
-'''
+"""
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        script_path = tmpdir / 'test_cookie_consent.js'
+        script_path = tmpdir / "test_cookie_consent.js"
         script_path.write_text(test_script)
 
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
 
         result = subprocess.run(
-            ['node', str(script_path)],
+            ["node", str(script_path)],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         print(f"stderr: {result.stderr}")
@@ -438,22 +525,28 @@ def test_hides_cookie_consent_on_filmin():
         assert result.returncode == 0, f"Test script failed: {result.stderr}"
 
         # Parse the JSON output
-        output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
-        assert len(output_lines) > 0, f"No JSON output from test script. stdout: {result.stdout}"
+        output_lines = [
+            line for line in result.stdout.strip().split("\n") if line.startswith("{")
+        ]
+        assert len(output_lines) > 0, (
+            f"No JSON output from test script. stdout: {result.stdout}"
+        )
 
         test_result = json.loads(output_lines[-1])
 
         # The cookie consent should have been found initially (or page changed)
         # After running closeModals, it should be hidden
-        if test_result['before_found']:
-            assert test_result['after_hidden'], \
+        if test_result["before_found"]:
+            assert test_result["after_hidden"], (
                 f"Cookie consent should be hidden after modalcloser. Result: {test_result}"
-            assert test_result['modals_closed'] > 0, \
+            )
+            assert test_result["modals_closed"] > 0, (
                 f"Should have closed at least one modal. Result: {test_result}"
+            )
         else:
             # Page may have changed, just verify no errors
             print("Cookie consent element not found (page may have changed)")
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/npm/on_Binary__10_npm_install.py b/abx_plugins/plugins/npm/on_Binary__10_npm_install.py
index 7c10541..7d4aeec 100755
--- a/abx_plugins/plugins/npm/on_Binary__10_npm_install.py
+++ b/abx_plugins/plugins/npm/on_Binary__10_npm_install.py
@@ -18,33 +18,37 @@
 from pathlib import Path
 
 import rich_click as click
-from abx_pkg import Binary, NpmProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-NpmProvider.model_rebuild()
+from abx_pkg import Binary, NpmProvider
 
 
 @click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', default=None, help="Custom install command")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str | None, overrides: str | None):
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--binary-id", required=True, help="Dependency UUID")
+@click.option("--name", required=True, help="Binary name to install")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--custom-cmd", default=None, help="Custom install command")
+@click.option("--overrides", default=None, help="JSON-encoded overrides dict")
+def main(
+    binary_id: str,
+    machine_id: str,
+    name: str,
+    binproviders: str,
+    custom_cmd: str | None,
+    overrides: str | None,
+):
     """Install binary using npm."""
 
-    if binproviders != '*' and 'npm' not in binproviders.split(','):
+    if binproviders != "*" and "npm" not in binproviders.split(","):
         click.echo(f"npm provider not allowed for {name}", err=True)
         sys.exit(0)
 
     # Get LIB_DIR from environment (optional)
-    lib_dir = os.environ.get('LIB_DIR', '').strip()
+    lib_dir = os.environ.get("LIB_DIR", "").strip()
     if not lib_dir:
-        lib_dir = str(Path.home() / '.config' / 'abx' / 'lib')
+        lib_dir = str(Path.home() / ".config" / "abx" / "lib")
 
     # Structure: lib/arm64-darwin/npm (npm will create node_modules inside this)
-    npm_prefix = Path(lib_dir) / 'npm'
+    npm_prefix = Path(lib_dir) / "npm"
     npm_prefix.mkdir(parents=True, exist_ok=True)
 
     # Use abx-pkg NpmProvider to install binary with custom prefix
@@ -61,11 +65,17 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
         if overrides:
             try:
                 overrides_dict = json.loads(overrides)
-                click.echo(f"Using custom install overrides: {overrides_dict}", err=True)
+                click.echo(
+                    f"Using custom install overrides: {overrides_dict}", err=True
+                )
             except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
+                click.echo(
+                    f"Warning: Failed to parse overrides JSON: {overrides}", err=True
+                )
 
-        binary = Binary(name=name, binproviders=[provider], overrides=overrides_dict or {}).install()
+        binary = Binary(
+            name=name, binproviders=[provider], overrides=overrides_dict or {}
+        ).install()
     except Exception as e:
         click.echo(f"npm install failed: {e}", err=True)
         sys.exit(1)
@@ -74,28 +84,28 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
         click.echo(f"{name} not found after npm install", err=True)
         sys.exit(1)
 
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = machine_id.strip() or os.environ.get("MACHINE_ID", "").strip()
 
     # Output Binary JSONL record to stdout
     record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'npm',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
+        "type": "Binary",
+        "name": name,
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": "npm",
+        "machine_id": machine_id,
+        "binary_id": binary_id,
     }
     print(json.dumps(record))
 
     # Emit PATH update for npm bin dirs (node_modules/.bin preferred)
     npm_bin_dirs = [
-        str(npm_prefix / 'node_modules' / '.bin'),
-        str(npm_prefix / 'bin'),
+        str(npm_prefix / "node_modules" / ".bin"),
+        str(npm_prefix / "bin"),
     ]
-    current_path = os.environ.get('PATH', '')
-    path_dirs = current_path.split(':') if current_path else []
+    current_path = os.environ.get("PATH", "")
+    path_dirs = current_path.split(":") if current_path else []
     new_path = current_path
 
     for npm_bin_dir in npm_bin_dirs:
@@ -103,21 +113,29 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
             new_path = f"{npm_bin_dir}:{new_path}" if new_path else npm_bin_dir
             path_dirs.insert(0, npm_bin_dir)
 
-    print(json.dumps({
-        'type': 'Machine',
-        'config': {
-            'PATH': new_path,
-        },
-    }))
+    print(
+        json.dumps(
+            {
+                "type": "Machine",
+                "config": {
+                    "PATH": new_path,
+                },
+            }
+        )
+    )
 
     # Also emit NODE_MODULES_DIR for JS module resolution
-    node_modules_dir = str(npm_prefix / 'node_modules')
-    print(json.dumps({
-        'type': 'Machine',
-        'config': {
-            'NODE_MODULES_DIR': node_modules_dir,
-        },
-    }))
+    node_modules_dir = str(npm_prefix / "node_modules")
+    print(
+        json.dumps(
+            {
+                "type": "Machine",
+                "config": {
+                    "NODE_MODULES_DIR": node_modules_dir,
+                },
+            }
+        )
+    )
 
     # Log human-readable info to stderr
     click.echo(f"Installed {name} at {binary.abspath}", err=True)
@@ -126,5 +144,5 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/npm/on_Crawl__00_npm_install.py b/abx_plugins/plugins/npm/on_Crawl__00_npm_install.py
index 48818e1..5423a02 100755
--- a/abx_plugins/plugins/npm/on_Crawl__00_npm_install.py
+++ b/abx_plugins/plugins/npm/on_Crawl__00_npm_install.py
@@ -14,49 +14,52 @@
 import os
 import sys
 from pathlib import Path
+from typing import Any
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
-def output_binary(name: str, binproviders: str, overrides: dict | None = None) -> None:
-    machine_id = os.environ.get('MACHINE_ID', '')
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+def output_binary(
+    name: str, binproviders: str, overrides: dict[str, Any] | None = None
+) -> None:
+    machine_id = os.environ.get("MACHINE_ID", "")
+    record: dict[str, Any] = {
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     if overrides:
-        record['overrides'] = overrides
+        record["overrides"] = overrides
     print(json.dumps(record))
 
 
 def main() -> None:
     output_binary(
-        name='node',
-        binproviders='apt,brew,env',
-        overrides={'apt': {'packages': ['nodejs']}},
+        name="node",
+        binproviders="apt,brew,env",
+        overrides={"apt": {"packages": ["nodejs"]}},
     )
 
     output_binary(
-        name='npm',
-        binproviders='apt,brew,env',
+        name="npm",
+        binproviders="apt,brew,env",
         overrides={
-            'apt': {'packages': ['nodejs', 'npm']},
-            'brew': {'packages': ['node']},
+            "apt": {"packages": ["nodejs", "npm"]},
+            "brew": {"packages": ["node"]},
         },
     )
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/npm/tests/test_npm_provider.py b/abx_plugins/plugins/npm/tests/test_npm_provider.py
index d357276..4dc6226 100644
--- a/abx_plugins/plugins/npm/tests/test_npm_provider.py
+++ b/abx_plugins/plugins/npm/tests/test_npm_provider.py
@@ -21,12 +21,12 @@
 
 # Get the path to the npm provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_npm_install.py'), None)
+INSTALL_HOOK = next(PLUGIN_DIR.glob("on_Binary__*_npm_install.py"), None)
 
 
 def npm_available() -> bool:
     """Check if npm is installed."""
-    return shutil.which('npm') is not None
+    return shutil.which("npm") is not None
 
 
 class TestNpmProviderHook:
@@ -47,99 +47,103 @@ def test_hook_script_exists(self):
     def test_hook_uses_default_lib_dir(self):
         """Hook should fall back to default LIB_DIR when not set."""
         env = os.environ.copy()
-        env.pop('LIB_DIR', None)
-        env['HOME'] = self.temp_dir
+        env.pop("LIB_DIR", None)
+        env["HOME"] = self.temp_dir
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=some-package',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=some-package",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert 'LIB_DIR environment variable not set' not in result.stderr
-        default_prefix = Path(self.temp_dir) / '.config' / 'abx' / 'lib' / 'npm'
+        assert "LIB_DIR environment variable not set" not in result.stderr
+        default_prefix = Path(self.temp_dir) / ".config" / "abx" / "lib" / "npm"
         assert default_prefix.exists()
 
     def test_hook_skips_when_npm_not_allowed(self):
         """Hook should skip when npm not in allowed binproviders."""
         env = os.environ.copy()
-        env['HOME'] = self.temp_dir
-        env.pop('LIB_DIR', None)
+        env["HOME"] = self.temp_dir
+        env.pop("LIB_DIR", None)
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=some-package',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # npm not allowed
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=some-package",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                "--binproviders=pip,apt",  # npm not allowed
             ],
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Should exit cleanly (code 0) when npm not allowed
-        assert 'npm provider not allowed' in result.stderr
+        assert "npm provider not allowed" in result.stderr
         assert result.returncode == 0
 
     def test_hook_creates_npm_prefix(self):
         """Hook should create npm prefix directory."""
         env = os.environ.copy()
-        env['HOME'] = self.temp_dir
-        env.pop('LIB_DIR', None)
+        env["HOME"] = self.temp_dir
+        env.pop("LIB_DIR", None)
 
         # Even if installation fails, the npm prefix should be created
         subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent-xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=nonexistent-xyz123",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
-        npm_prefix = Path(self.temp_dir) / '.config' / 'abx' / 'lib' / 'npm'
+        npm_prefix = Path(self.temp_dir) / ".config" / "abx" / "lib" / "npm"
         assert npm_prefix.exists()
 
     def test_hook_handles_overrides(self):
         """Hook should accept overrides JSON."""
         env = os.environ.copy()
-        env['HOME'] = self.temp_dir
-        env.pop('LIB_DIR', None)
+        env["HOME"] = self.temp_dir
+        env.pop("LIB_DIR", None)
 
-        overrides = json.dumps({'npm': {'packages': ['custom-pkg']}})
+        overrides = json.dumps({"npm": {"packages": ["custom-pkg"]}})
 
         # Just verify it doesn't crash with overrides
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=test-pkg',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                f'--overrides={overrides}',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=test-pkg",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
+                f"--overrides={overrides}",
             ],
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # May fail to install, but should not crash parsing overrides
-        assert 'Failed to parse overrides JSON' not in result.stderr
+        assert "Failed to parse overrides JSON" not in result.stderr
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/papersdl/on_Crawl__30_papersdl_install.py b/abx_plugins/plugins/papersdl/on_Crawl__30_papersdl_install.py
index 4b6a68b..f0ef39b 100755
--- a/abx_plugins/plugins/papersdl/on_Crawl__30_papersdl_install.py
+++ b/abx_plugins/plugins/papersdl/on_Crawl__30_papersdl_install.py
@@ -15,47 +15,48 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
 def output_binary(name: str, binproviders: str):
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
     record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     print(json.dumps(record))
 
 
 def main():
-    papersdl_enabled = get_env_bool('PAPERSDL_ENABLED', True)
+    papersdl_enabled = get_env_bool("PAPERSDL_ENABLED", True)
 
     if not papersdl_enabled:
         sys.exit(0)
 
-    output_binary(name='papers-dl', binproviders='pip,env')
+    output_binary(name="papers-dl", binproviders="pip,env")
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/papersdl/on_Snapshot__66_papersdl.bg.py b/abx_plugins/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
index 20eef9c..93c2f15 100755
--- a/abx_plugins/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
+++ b/abx_plugins/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
@@ -36,23 +36,25 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'papersdl'
-BIN_NAME = 'papers-dl'
-BIN_PROVIDERS = 'pip,env'
+PLUGIN_NAME = "papersdl"
+BIN_NAME = "papers-dl"
+BIN_PROVIDERS = "pip,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-def get_env(name: str, default: str = '') -> str:
+
+
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -66,7 +68,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -81,13 +83,21 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
 def extract_doi_from_url(url: str) -> str | None:
     """Extract DOI from common paper URLs."""
     # Match DOI pattern in URL
-    doi_pattern = r'10\.\d{4,}/[^\s]+'
+    doi_pattern = r"10\.\d{4,}/[^\s]+"
     match = re.search(doi_pattern, url)
     if match:
         return match.group(0)
     return None
 
 
+def extract_arxiv_id_from_doi(doi: str) -> str | None:
+    """Extract arXiv identifier from arXiv DOI format."""
+    match = re.search(r"10\.48550/arXiv\.(\d{4}\.\d{4,5}(?:v\d+)?)", doi, re.IGNORECASE)
+    if not match:
+        return None
+    return match.group(1)
+
+
 def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
     """
     Download paper using papers-dl.
@@ -95,9 +105,9 @@ def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
     Returns: (success, output_path, error_message)
     """
     # Get config from env
-    timeout = get_env_int('TIMEOUT', 300)
-    papersdl_args = get_env_array('PAPERSDL_ARGS', [])
-    papersdl_args_extra = get_env_array('PAPERSDL_ARGS_EXTRA', [])
+    timeout = get_env_int("PAPERSDL_TIMEOUT", get_env_int("TIMEOUT", 300))
+    papersdl_args = get_env_array("PAPERSDL_ARGS", ["fetch"])
+    papersdl_args_extra = get_env_array("PAPERSDL_ARGS_EXTRA", [])
 
     # Output directory is current directory (hook already runs in output dir)
     output_dir = Path(OUTPUT_DIR)
@@ -108,16 +118,18 @@ def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
         # If no DOI found, papers-dl might handle the URL directly
         identifier = url
     else:
-        identifier = doi
+        # papers-dl's arxiv provider resolves arXiv IDs more reliably than DOI backends.
+        arxiv_id = extract_arxiv_id_from_doi(doi)
+        identifier = f"arXiv:{arxiv_id}" if arxiv_id else doi
 
     # Build command - papers-dl <args> <identifier> -o <output_dir>
-    cmd = [binary, *papersdl_args, identifier, '-o', str(output_dir)]
+    cmd = [binary, *papersdl_args, identifier, "-o", str(output_dir)]
 
     if papersdl_args_extra:
         cmd.extend(papersdl_args_extra)
 
     try:
-        print(f'[papersdl] Starting download (timeout={timeout}s)', file=sys.stderr)
+        print(f"[papersdl] Starting download (timeout={timeout}s)", file=sys.stderr)
         output_lines: list[str] = []
         process = subprocess.Popen(
             cmd,
@@ -142,17 +154,17 @@ def _read_output() -> None:
         except subprocess.TimeoutExpired:
             process.kill()
             reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
+            return False, None, f"Timed out after {timeout} seconds"
 
         reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
+        combined_output = "".join(output_lines)
 
         # Check if any PDF files were downloaded
-        pdf_files = list(output_dir.glob('*.pdf'))
+        pdf_files = list(output_dir.glob("*.pdf"))
 
         if pdf_files:
             # Return first PDF file
-            return True, str(pdf_files[0]), ''
+            return True, str(pdf_files[0]), ""
         else:
             stderr = combined_output
             stdout = combined_output
@@ -160,46 +172,49 @@ def _read_output() -> None:
             # These are NOT errors - page simply has no downloadable paper
             stderr_lower = stderr.lower()
             stdout_lower = stdout.lower()
-            if 'not found' in stderr_lower or 'not found' in stdout_lower:
-                return True, None, ''  # Paper not available - success, no output
-            if 'no results' in stderr_lower or 'no results' in stdout_lower:
-                return True, None, ''  # No paper found - success, no output
+            if "not found" in stderr_lower or "not found" in stdout_lower:
+                return True, None, ""  # Paper not available - success, no output
+            if "no results" in stderr_lower or "no results" in stdout_lower:
+                return True, None, ""  # No paper found - success, no output
             if process.returncode == 0:
-                return True, None, ''  # papers-dl exited cleanly, just no paper - success
+                return (
+                    True,
+                    None,
+                    "",
+                )  # papers-dl exited cleanly, just no paper - success
 
             # These ARE errors - something went wrong
-            if '404' in stderr or '404' in stdout:
-                return False, None, '404 Not Found'
-            if '403' in stderr or '403' in stdout:
-                return False, None, '403 Forbidden'
+            if "404" in stderr or "404" in stdout:
+                return False, None, "404 Not Found"
+            if "403" in stderr or "403" in stdout:
+                return False, None, "403 Forbidden"
 
-            return False, None, f'papers-dl error: {stderr[:200] or stdout[:200]}'
+            return False, None, f"papers-dl error: {stderr[:200] or stdout[:200]}"
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to download paper from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to download paper from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Download scientific paper from a URL using papers-dl."""
 
     output = None
-    status = 'failed'
-    error = ''
+    error = ""
 
     try:
         # Check if papers-dl is enabled
-        if not get_env_bool('PAPERSDL_ENABLED', True):
-            print('Skipping papers-dl (PAPERSDL_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("PAPERSDL_ENABLED", True):
+            print("Skipping papers-dl (PAPERSDL_ENABLED=False)", file=sys.stderr)
             # Temporary failure (config disabled) - NO JSONL emission
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('PAPERSDL_BINARY', 'papers-dl')
+        binary = get_env("PAPERSDL_BINARY", "papers-dl")
 
         # Run extraction
         success, output, error = save_paper(url, binary)
@@ -207,22 +222,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/papersdl/tests/test_papersdl.py b/abx_plugins/plugins/papersdl/tests/test_papersdl.py
index d26ef9c..9ba2326 100644
--- a/abx_plugins/plugins/papersdl/tests/test_papersdl.py
+++ b/abx_plugins/plugins/papersdl/tests/test_papersdl.py
@@ -12,6 +12,7 @@
 """
 
 import json
+import os
 import subprocess
 import sys
 import tempfile
@@ -21,66 +22,91 @@
 
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
-PAPERSDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_papersdl.*'), None)
-TEST_URL = 'https://example.com'
+_PAPERSDL_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_papersdl.*"), None)
+if _PAPERSDL_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+PAPERSDL_HOOK = _PAPERSDL_HOOK
+TEST_URL = "https://example.com"
 
 # Module-level cache for binary path
 _papersdl_binary_path = None
+_papersdl_install_error = None
+_papersdl_home_root = None
+
+
+def require_papersdl_binary() -> str:
+    """Return papers-dl binary path or fail with actionable context."""
+    binary_path = get_papersdl_binary_path()
+    assert binary_path, (
+        "papers-dl installation failed. Install hook must install the real papers-dl package "
+        f"from PyPI. {_papersdl_install_error or ''}".strip()
+    )
+    assert Path(binary_path).is_file(), f"papers-dl binary path invalid: {binary_path}"
+    return binary_path
+
 
 def get_papersdl_binary_path():
     """Get the installed papers-dl binary path from cache or by running installation."""
-    global _papersdl_binary_path
+    global _papersdl_binary_path, _papersdl_install_error, _papersdl_home_root
     if _papersdl_binary_path:
         return _papersdl_binary_path
 
-    # Try to find papers-dl binary using abx-pkg
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    try:
-        binary = Binary(
-            name='papers-dl',
-            binproviders=[PipProvider(), EnvProvider()]
-        ).load()
-
-        if binary and binary.abspath:
-            _papersdl_binary_path = str(binary.abspath)
-            return _papersdl_binary_path
-    except Exception:
-        pass
-
-    # If not found, try to install via pip
-    pip_hook = PLUGINS_ROOT / 'pip' / 'on_Binary__install_using_pip_provider.py'
-    if pip_hook.exists():
+    # Always validate installation path by running the real pip hook.
+    pip_hook = PLUGINS_ROOT / "pip" / "on_Binary__11_pip_install.py"
+    if pip_hook and pip_hook.exists():
         binary_id = str(uuid.uuid4())
         machine_id = str(uuid.uuid4())
+        if not _papersdl_home_root:
+            _papersdl_home_root = tempfile.mkdtemp(prefix="papersdl-lib-")
+
+        env = os.environ.copy()
+        env["HOME"] = str(_papersdl_home_root)
+        env["SNAP_DIR"] = str(Path(_papersdl_home_root) / "data")
+        env.pop("LIB_DIR", None)
 
         cmd = [
-            sys.executable, str(pip_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'papers-dl'
+            sys.executable,
+            str(pip_hook),
+            "--binary-id",
+            binary_id,
+            "--machine-id",
+            machine_id,
+            "--name",
+            "papers-dl",
         ]
 
         install_result = subprocess.run(
             cmd,
             capture_output=True,
             text=True,
-            timeout=300
+            timeout=300,
+            env=env,
         )
 
         # Parse Binary from pip installation
-        for install_line in install_result.stdout.strip().split('\n'):
+        for install_line in install_result.stdout.strip().split("\n"):
             if install_line.strip():
                 try:
                     install_record = json.loads(install_line)
-                    if install_record.get('type') == 'Binary' and install_record.get('name') == 'papers-dl':
-                        _papersdl_binary_path = install_record.get('abspath')
+                    if (
+                        install_record.get("type") == "Binary"
+                        and install_record.get("name") == "papers-dl"
+                    ):
+                        _papersdl_binary_path = install_record.get("abspath")
                         return _papersdl_binary_path
                 except json.JSONDecodeError:
                     pass
+        _papersdl_install_error = (
+            f"pip hook failed with returncode={install_result.returncode}. "
+            f"stderr={install_result.stderr.strip()[:400]} "
+            f"stdout={install_result.stdout.strip()[:400]}"
+        )
+        return None
 
+    _papersdl_install_error = f"pip hook not found: {pip_hook}"
     return None
 
+
 def test_hook_script_exists():
     """Verify on_Snapshot hook exists."""
     assert PAPERSDL_HOOK.exists(), f"Hook not found: {PAPERSDL_HOOK}"
@@ -88,103 +114,193 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify papers-dl is installed by calling the REAL installation hooks."""
-    binary_path = get_papersdl_binary_path()
-    assert binary_path, "papers-dl must be installed successfully via install hook and pip provider"
-    assert Path(binary_path).is_file(), f"Binary path must be a valid file: {binary_path}"
+    binary_path = require_papersdl_binary()
+    assert Path(binary_path).is_file(), (
+        f"Binary path must be a valid file: {binary_path}"
+    )
 
 
 def test_handles_non_paper_url():
     """Test that papers-dl extractor handles non-paper URLs gracefully via hook."""
-    import os
-
-    binary_path = get_papersdl_binary_path()
-    assert binary_path, "Binary must be installed for this test"
+    binary_path = require_papersdl_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         env = os.environ.copy()
-        env['PAPERSDL_BINARY'] = binary_path
+        env["PAPERSDL_BINARY"] = binary_path
 
         # Run papers-dl extraction hook on non-paper URL
         result = subprocess.run(
-            [sys.executable, str(PAPERSDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(PAPERSDL_HOOK),
+                "--url",
+                "https://example.com",
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Should exit 0 even for non-paper URL
-        assert result.returncode == 0, f"Should handle non-paper URL gracefully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should handle non-paper URL gracefully: {result.stderr}"
+        )
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
 
 def test_config_save_papersdl_false_skips():
     """Test that PAPERSDL_ENABLED=False exits without emitting JSONL."""
-    import os
-
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['PAPERSDL_ENABLED'] = 'False'
+        env["PAPERSDL_ENABLED"] = "False"
 
         result = subprocess.run(
-            [sys.executable, str(PAPERSDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(PAPERSDL_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_config_timeout():
     """Test that PAPERSDL_TIMEOUT config is respected."""
-    import os
-
-    binary_path = get_papersdl_binary_path()
-    assert binary_path, "Binary must be installed for this test"
+    binary_path = require_papersdl_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['PAPERSDL_BINARY'] = binary_path
-        env['PAPERSDL_TIMEOUT'] = '5'
+        env["PAPERSDL_BINARY"] = binary_path
+        env["PAPERSDL_TIMEOUT"] = "5"
 
         result = subprocess.run(
-            [sys.executable, str(PAPERSDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
+            [
+                sys.executable,
+                str(PAPERSDL_HOOK),
+                "--url",
+                "https://example.com",
+                "--snapshot-id",
+                "testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         assert result.returncode == 0, "Should complete without hanging"
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+
+def test_real_doi_download():
+    """Test that papers-dl downloads a real paper PDF from a DOI URL."""
+    binary_path = require_papersdl_binary()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Public DOI for an open-access arXiv paper.
+        doi_url = "https://doi.org/10.48550/arXiv.1706.03762"
+
+        env = os.environ.copy()
+        env["PAPERSDL_BINARY"] = binary_path
+        env["PAPERSDL_TIMEOUT"] = "120"
+        env["SNAP_DIR"] = str(tmpdir)
+
+        result = subprocess.run(
+            [
+                sys.executable,
+                str(PAPERSDL_HOOK),
+                "--url",
+                doi_url,
+                "--snapshot-id",
+                "testrealdoi",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=180,
+        )
+
+        assert result.returncode == 0, f"DOI download should succeed: {result.stderr}"
+
+        result_json = None
+        for line in result.stdout.strip().split("\n"):
+            line = line.strip()
+            if line.startswith("{"):
+                try:
+                    record = json.loads(line)
+                    if record.get("type") == "ArchiveResult":
+                        result_json = record
+                        break
+                except json.JSONDecodeError:
+                    pass
+
+        assert result_json, f"Should emit ArchiveResult JSONL. stdout: {result.stdout}"
+        assert result_json.get("status") == "succeeded", (
+            f"DOI download should succeed: {result_json}"
+        )
+
+        output_str = (result_json.get("output_str") or "").strip()
+        assert output_str, (
+            f"ArchiveResult must include output path for DOI download: {result_json}"
+        )
+
+        output_path = Path(output_str)
+        assert output_path.is_file(), f"Downloaded paper path missing: {output_path}"
+        assert output_path.suffix.lower() == ".pdf", (
+            f"Downloaded paper must be a PDF: {output_path}"
+        )
+        assert output_path.stat().st_size > 0, f"Downloaded PDF is empty: {output_path}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js b/abx_plugins/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
index b4d57d6..16454a5 100755
--- a/abx_plugins/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
+++ b/abx_plugins/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
@@ -23,6 +23,13 @@ const path = require('path');
 // Add NODE_MODULES_DIR to module resolution paths if set
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
+const {
+    getEnvBool,
+    getEnvInt,
+    parseArgs,
+    connectToPage,
+    waitForPageLoaded,
+} = require('../chrome/chrome_utils.js');
 
 // Extractor metadata
 const PLUGIN_NAME = 'parse_dom_outlinks';
@@ -37,80 +44,22 @@ const OUTPUT_FILE = 'outlinks.json';
 const URLS_FILE = 'urls.jsonl';  // For crawl system
 const CHROME_SESSION_DIR = '../chrome';
 
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Get environment variable with default
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-// Get CDP URL from chrome plugin
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
 // Extract outlinks
-async function extractOutlinks(url, snapshotId, crawlId, depth) {
+async function extractOutlinks(url, snapshotId, crawlId, depth, timeoutMs) {
     // Output directory is current directory (hook already runs in output dir)
     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
 
     let browser = null;
 
     try {
-        // Connect to existing Chrome session
-        const cdpUrl = getCdpUrl();
-        if (!cdpUrl) {
-            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
-        }
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
+        const connection = await connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs,
+            puppeteer,
         });
-
-        // Get the page
-        const pages = await browser.pages();
-        const page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            return { success: false, error: 'No page found in Chrome session' };
-        }
+        browser = connection.browser;
+        const page = connection.page;
+        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs * 4, 200);
 
         // Extract outlinks by category
         const outlinksData = await page.evaluate(() => {
@@ -249,17 +198,9 @@ async function main() {
             process.exit(0);
         }
 
-        // Check if Chrome session exists, then wait for page load
-        const cdpUrl = getCdpUrl();
-        if (cdpUrl) {
-            // Wait for page to be fully loaded
-            const pageLoaded = await waitForChromeTabLoaded(60000);
-            if (!pageLoaded) {
-                throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-            }
-        }
+        const timeoutMs = getEnvInt('PARSE_DOM_OUTLINKS_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
 
-        const result = await extractOutlinks(url, snapshotId, crawlId, depth);
+        const result = await extractOutlinks(url, snapshotId, crawlId, depth, timeoutMs);
 
         if (result.success) {
             status = 'succeeded';
diff --git a/abx_plugins/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py b/abx_plugins/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
index d1affe0..f08009a 100644
--- a/abx_plugins/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
+++ b/abx_plugins/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
@@ -13,18 +13,18 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
-    get_test_env,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_url,
 )
 
 
 def chrome_available() -> bool:
     """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
+    for name in ["chromium", "chromium-browser", "google-chrome", "chrome"]:
         if shutil.which(name):
             return True
     return False
@@ -32,7 +32,7 @@ def chrome_available() -> bool:
 
 # Get the path to the parse_dom_outlinks hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-OUTLINKS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_parse_dom_outlinks.*')
+OUTLINKS_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_parse_dom_outlinks.*")
 
 
 class TestParseDomOutlinksPlugin:
@@ -40,7 +40,9 @@ class TestParseDomOutlinksPlugin:
 
     def test_outlinks_hook_exists(self):
         """DOM outlinks hook script should exist."""
-        assert OUTLINKS_HOOK is not None, "DOM outlinks hook not found in plugin directory"
+        assert OUTLINKS_HOOK is not None, (
+            "DOM outlinks hook not found in plugin directory"
+        )
         assert OUTLINKS_HOOK.exists(), f"Hook not found: {OUTLINKS_HOOK}"
 
 
@@ -58,12 +60,12 @@ def teardown_method(self, _method=None):
     def test_outlinks_extracts_links_from_page(self, chrome_test_url):
         """DOM outlinks hook should extract and categorize links from page."""
         test_url = chrome_test_url
-        snapshot_id = 'test-outlinks-snapshot'
+        snapshot_id = "test-outlinks-snapshot"
 
         try:
             with chrome_session(
                 self.temp_dir,
-                crawl_id='test-outlinks-crawl',
+                crawl_id="test-outlinks-crawl",
                 snapshot_id=snapshot_id,
                 test_url=test_url,
                 navigate=True,
@@ -71,20 +73,24 @@ def test_outlinks_extracts_links_from_page(self, chrome_test_url):
             ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
-
                 # Run outlinks hook with the active Chrome session
                 result = subprocess.run(
-                    ['node', str(OUTLINKS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(OUTLINKS_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(snapshot_chrome_dir),
                     capture_output=True,
                     text=True,
                     timeout=60,
-                    env=env
+                    env=env,
                 )
 
                 # Check for output file
-                snap_dir = Path(env['SNAP_DIR'])
-                outlinks_output = snap_dir / 'parse_dom_outlinks' / 'outlinks.json'
+                snap_dir = Path(env["SNAP_DIR"])
+                outlinks_output = snap_dir / "parse_dom_outlinks" / "outlinks.json"
 
                 outlinks_data = None
                 json_error = None
@@ -99,21 +105,21 @@ def test_outlinks_extracts_links_from_page(self, chrome_test_url):
 
                 # Verify hook ran successfully
                 assert result.returncode == 0, f"Hook failed: {result.stderr}"
-                assert 'Traceback' not in result.stderr
+                assert "Traceback" not in result.stderr
 
                 # Verify we got outlinks data with expected categories
                 assert outlinks_data is not None, (
                     f"No outlinks data found - file missing or invalid JSON: {json_error}"
                 )
 
-                assert 'url' in outlinks_data, f"Missing url: {outlinks_data}"
-                assert 'hrefs' in outlinks_data, f"Missing hrefs: {outlinks_data}"
+                assert "url" in outlinks_data, f"Missing url: {outlinks_data}"
+                assert "hrefs" in outlinks_data, f"Missing hrefs: {outlinks_data}"
                 # example.com has at least one link (to iana.org)
-                assert isinstance(outlinks_data['hrefs'], list)
+                assert isinstance(outlinks_data["hrefs"], list)
 
         except RuntimeError:
             raise
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py b/abx_plugins/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
index 99707a1..7413cd4 100755
--- a/abx_plugins/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
+++ b/abx_plugins/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
@@ -25,7 +25,6 @@
 import os
 import re
 import sys
-from datetime import datetime, timezone
 from html import unescape
 from html.parser import HTMLParser
 from pathlib import Path
@@ -33,27 +32,27 @@
 
 import rich_click as click
 
-PLUGIN_NAME = 'parse_html_urls'
+PLUGIN_NAME = "parse_html_urls"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 # Check if parse_dom_outlinks extractor already ran (sibling plugin output dir)
-DOM_OUTLINKS_URLS_FILE = Path('..') / 'parse_dom_outlinks' / 'urls.jsonl'
-URLS_FILE = Path('urls.jsonl')
+DOM_OUTLINKS_URLS_FILE = Path("..") / "parse_dom_outlinks" / "urls.jsonl"
+URLS_FILE = Path("urls.jsonl")
 
 
 # URL regex from archivebox/misc/util.py
 URL_REGEX = re.compile(
-    r'(?=('
-    r'http[s]?://'
-    r'(?:[a-zA-Z]|[0-9]'
-    r'|[-_$@.&+!*\(\),]'
-    r'|[^\u0000-\u007F])+'
+    r"(?=("
+    r"http[s]?://"
+    r"(?:[a-zA-Z]|[0-9]"
+    r"|[-_$@.&+!*\(\),]"
+    r"|[^\u0000-\u007F])+"
     r'[^\]\[<>"\'\s]+'
-    r'))',
+    r"))",
     re.IGNORECASE | re.UNICODE,
 )
 
@@ -66,23 +65,25 @@ def __init__(self):
         self.urls = []
 
     def handle_starttag(self, tag, attrs):
-        if tag == 'a':
+        if tag == "a":
             for attr, value in attrs:
-                if attr == 'href' and value:
+                if attr == "href" and value:
                     self.urls.append(value)
 
 
 def did_urljoin_misbehave(root_url: str, relative_path: str, final_url: str) -> bool:
     """Check if urljoin incorrectly stripped // from sub-URLs."""
     relative_path = relative_path.lower()
-    if relative_path.startswith('http://') or relative_path.startswith('https://'):
-        relative_path = relative_path.split('://', 1)[-1]
+    if relative_path.startswith("http://") or relative_path.startswith("https://"):
+        relative_path = relative_path.split("://", 1)[-1]
 
-    original_path_had_suburl = '://' in relative_path
-    original_root_had_suburl = '://' in root_url[8:]
-    final_joined_has_suburl = '://' in final_url[8:]
+    original_path_had_suburl = "://" in relative_path
+    original_root_had_suburl = "://" in root_url[8:]
+    final_joined_has_suburl = "://" in final_url[8:]
 
-    return (original_root_had_suburl or original_path_had_suburl) and not final_joined_has_suburl
+    return (
+        original_root_had_suburl or original_path_had_suburl
+    ) and not final_joined_has_suburl
 
 
 def fix_urljoin_bug(url: str, nesting_limit=5) -> str:
@@ -90,11 +91,11 @@ def fix_urljoin_bug(url: str, nesting_limit=5) -> str:
     input_url = url
     for _ in range(nesting_limit):
         url = re.sub(
-            r'(?P<root>.+?)'
-            r'(?P<separator>[-=/_&+%$#@!*\(\\])'
-            r'(?P<subscheme>[a-zA-Z0-9+_-]{1,32}?):/'
-            r'(?P<suburl>[^/\\]+)',
-            r'\1\2\3://\4',
+            r"(?P<root>.+?)"
+            r"(?P<separator>[-=/_&+%$#@!*\(\\])"
+            r"(?P<subscheme>[a-zA-Z0-9+_-]{1,32}?):/"
+            r"(?P<suburl>[^/\\]+)",
+            r"\1\2\3://\4",
             input_url,
             re.IGNORECASE | re.UNICODE,
         )
@@ -104,13 +105,15 @@ def fix_urljoin_bug(url: str, nesting_limit=5) -> str:
     return url
 
 
-def normalize_url(url: str, root_url: str = None) -> str:
+def normalize_url(url: str, root_url: str | None = None) -> str:
     """Normalize a URL, resolving relative paths if root_url provided."""
     url = clean_url_candidate(url)
     if not root_url:
         return _normalize_trailing_slash(url)
 
-    url_is_absolute = url.lower().startswith('http://') or url.lower().startswith('https://')
+    url_is_absolute = url.lower().startswith("http://") or url.lower().startswith(
+        "https://"
+    )
 
     if url_is_absolute:
         return url
@@ -129,10 +132,24 @@ def _normalize_trailing_slash(url: str) -> str:
     """Drop trailing slash for non-root paths when no query/fragment."""
     try:
         parsed = urlparse(url)
-        path = parsed.path or ''
-        if path != '/' and path.endswith('/') and not parsed.query and not parsed.fragment:
-            path = path.rstrip('/')
-            return urlunparse((parsed.scheme, parsed.netloc, path, parsed.params, parsed.query, parsed.fragment))
+        path = parsed.path or ""
+        if (
+            path != "/"
+            and path.endswith("/")
+            and not parsed.query
+            and not parsed.fragment
+        ):
+            path = path.rstrip("/")
+            return urlunparse(
+                (
+                    parsed.scheme,
+                    parsed.netloc,
+                    path,
+                    parsed.params,
+                    parsed.query,
+                    parsed.fragment,
+                )
+            )
     except Exception:
         pass
     return url
@@ -140,16 +157,16 @@ def _normalize_trailing_slash(url: str) -> str:
 
 def clean_url_candidate(url: str) -> str:
     """Strip obvious surrounding/trailing punctuation from extracted URLs."""
-    cleaned = (url or '').strip()
+    cleaned = (url or "").strip()
     if not cleaned:
         return cleaned
 
     # Strip common wrappers
-    cleaned = cleaned.strip(' \t\r\n')
-    cleaned = cleaned.strip('"\''"'"'<>[]()')
+    cleaned = cleaned.strip(" \t\r\n")
+    cleaned = cleaned.strip("\"''<>[]()")
 
     # Strip trailing punctuation and escape artifacts
-    cleaned = cleaned.rstrip('.,;:!?)\\\'"')
+    cleaned = cleaned.rstrip(".,;:!?)\\'\"")
     cleaned = cleaned.rstrip('"')
 
     # Strip leading punctuation artifacts
@@ -162,41 +179,44 @@ def fetch_content(url: str) -> str:
     """Fetch content from a URL (supports file:// and https://)."""
     parsed = urlparse(url)
 
-    if parsed.scheme == 'file':
+    if parsed.scheme == "file":
         file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
             return f.read()
     else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
+        timeout = int(os.environ.get("TIMEOUT", "60"))
+        user_agent = os.environ.get(
+            "USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)"
+        )
 
         import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
+
+        req = urllib.request.Request(url, headers={"User-Agent": user_agent})
         with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
+            return response.read().decode("utf-8", errors="replace")
 
 
 def find_html_sources() -> list[str]:
     """Find HTML content from other extractors in the snapshot directory."""
     search_patterns = [
-        'readability/content.html',
-        '*_readability/content.html',
-        'mercury/content.html',
-        '*_mercury/content.html',
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
-        'wget/**/*.htm*',
-        '*_wget/**/*.htm*',
+        "readability/content.html",
+        "*_readability/content.html",
+        "mercury/content.html",
+        "*_mercury/content.html",
+        "singlefile/singlefile.html",
+        "*_singlefile/singlefile.html",
+        "singlefile/*.html",
+        "*_singlefile/*.html",
+        "dom/output.html",
+        "*_dom/output.html",
+        "dom/*.html",
+        "*_dom/*.html",
+        "wget/**/*.html",
+        "*_wget/**/*.html",
+        "wget/**/*.htm",
+        "*_wget/**/*.htm",
+        "wget/**/*.htm*",
+        "*_wget/**/*.htm*",
     ]
 
     sources: list[str] = []
@@ -206,7 +226,7 @@ def find_html_sources() -> list[str]:
                 if not match.is_file() or match.stat().st_size == 0:
                     continue
                 try:
-                    sources.append(match.read_text(errors='ignore'))
+                    sources.append(match.read_text(errors="ignore"))
                 except Exception:
                     continue
 
@@ -214,24 +234,31 @@ def find_html_sources() -> list[str]:
 
 
 @click.command()
-@click.option('--url', required=True, help='HTML URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
+@click.option("--url", required=True, help="HTML URL to parse")
+@click.option("--snapshot-id", required=False, help="Parent Snapshot UUID")
+@click.option("--crawl-id", required=False, help="Crawl UUID")
+@click.option("--depth", type=int, default=0, help="Current depth level")
+def main(
+    url: str,
+    snapshot_id: str | None = None,
+    crawl_id: str | None = None,
+    depth: int = 0,
+):
     """Parse HTML and extract href URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    env_depth = os.environ.get("SNAPSHOT_DEPTH")
     if env_depth is not None:
         try:
             depth = int(env_depth)
         except Exception:
             pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
+    crawl_id = crawl_id or os.environ.get("CRAWL_ID")
 
     # Skip only if parse_dom_outlinks already ran AND found URLs (it uses Chrome for better coverage)
     # If parse_dom_outlinks ran but found nothing, we still try static HTML parsing as fallback
     if DOM_OUTLINKS_URLS_FILE.exists() and DOM_OUTLINKS_URLS_FILE.stat().st_size > 0:
-        click.echo(f'Skipping parse_html_urls - parse_dom_outlinks already extracted URLs')
+        click.echo(
+            "Skipping parse_html_urls - parse_dom_outlinks already extracted URLs"
+        )
         sys.exit(0)
 
     contents = find_html_sources()
@@ -239,7 +266,7 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
         try:
             contents = [fetch_content(url)]
         except Exception as e:
-            click.echo(f'Failed to fetch {url}: {e}', err=True)
+            click.echo(f"Failed to fetch {url}: {e}", err=True)
             sys.exit(1)
 
     urls_found = set()
@@ -253,14 +280,18 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
 
         for href in parser.urls:
             normalized = normalize_url(href, root_url=url)
-            if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
+            if normalized.lower().startswith(
+                "http://"
+            ) or normalized.lower().startswith("https://"):
                 if normalized != url:
                     urls_found.add(unescape(normalized))
 
         # Also capture explicit URLs in the HTML text
         for match in URL_REGEX.findall(content):
             normalized = normalize_url(match, root_url=url)
-            if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
+            if normalized.lower().startswith(
+                "http://"
+            ) or normalized.lower().startswith("https://"):
                 if normalized != url:
                     urls_found.add(unescape(normalized))
 
@@ -268,28 +299,30 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     records = []
     for found_url in sorted(urls_found):
         record = {
-            'type': 'Snapshot',
-            'url': found_url,
-            'plugin': PLUGIN_NAME,
-            'depth': depth + 1,
+            "type": "Snapshot",
+            "url": found_url,
+            "plugin": PLUGIN_NAME,
+            "depth": depth + 1,
         }
         if snapshot_id:
-            record['parent_snapshot_id'] = snapshot_id
+            record["parent_snapshot_id"] = snapshot_id
         if crawl_id:
-            record['crawl_id'] = crawl_id
+            record["crawl_id"] = crawl_id
 
         records.append(record)
         print(json.dumps(record))
 
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in records) + ('\n' if records else ''))
+    URLS_FILE.write_text(
+        "\n".join(json.dumps(r) for r in records) + ("\n" if records else "")
+    )
 
     # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
+    status = "succeeded" if urls_found else "skipped"
     output_str = URLS_FILE.name
     ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output_str,
     }
     print(json.dumps(ar_record))
 
@@ -297,5 +330,5 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/parse_html_urls/tests/test_parse_html_urls.py b/abx_plugins/plugins/parse_html_urls/tests/test_parse_html_urls.py
index d206f12..5b522f0 100644
--- a/abx_plugins/plugins/parse_html_urls/tests/test_parse_html_urls.py
+++ b/abx_plugins/plugins/parse_html_urls/tests/test_parse_html_urls.py
@@ -10,7 +10,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_html_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_html_urls.*"), None)
 
 
 class TestParseHtmlUrls:
@@ -19,9 +19,9 @@ class TestParseHtmlUrls:
     def test_parses_real_example_com(self, tmp_path):
         """Test parsing real https://example.com and extracting its links."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmp_path)
+        env["SNAP_DIR"] = str(tmp_path)
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'https://example.com'],
+            [sys.executable, str(SCRIPT_PATH), "--url", "https://example.com"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -33,16 +33,20 @@ def test_parses_real_example_com(self, tmp_path):
 
         # Verify stdout contains JSONL records for discovered URLs
         # example.com links to iana.org
-        assert 'iana.org' in result.stdout or 'example' in result.stdout, "Expected links from example.com not found"
+        assert "iana.org" in result.stdout or "example" in result.stdout, (
+            "Expected links from example.com not found"
+        )
 
         # Verify ArchiveResult record is present
-        assert '"type": "ArchiveResult"' in result.stdout, "Missing ArchiveResult record"
+        assert '"type": "ArchiveResult"' in result.stdout, (
+            "Missing ArchiveResult record"
+        )
         assert '"status": "succeeded"' in result.stdout, "Missing success status"
 
     def test_extracts_href_urls(self, tmp_path):
         """Test extracting URLs from anchor tags."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
+        input_file = tmp_path / "page.html"
+        input_file.write_text("""
 <!DOCTYPE html>
 <html>
 <body>
@@ -51,12 +55,12 @@ def test_extracts_href_urls(self, tmp_path):
     <a href="http://test.org">Test</a>
 </body>
 </html>
-        ''')
+        """)
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmp_path)
+        env["SNAP_DIR"] = str(tmp_path)
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -64,36 +68,44 @@ def test_extracts_href_urls(self, tmp_path):
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
 
         # Parse Snapshot records from stdout
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 3, f"Expected 3 Snapshot records, got {len(lines)}"
 
         urls = set()
         for line in lines:
             entry = json.loads(line)
-            assert entry['type'] == 'Snapshot'
-            assert 'url' in entry
-            urls.add(entry['url'])
+            assert entry["type"] == "Snapshot"
+            assert "url" in entry
+            urls.add(entry["url"])
 
-        assert 'https://example.com' in urls
-        assert 'https://foo.bar/page' in urls
-        assert 'http://test.org' in urls
+        assert "https://example.com" in urls
+        assert "https://foo.bar/page" in urls
+        assert "http://test.org" in urls
 
         # Verify ArchiveResult record
         assert '"type": "ArchiveResult"' in result.stdout
         assert '"status": "succeeded"' in result.stdout
 
-        urls_file = tmp_path / 'parse_html_urls' / 'urls.jsonl'
+        urls_file = tmp_path / "parse_html_urls" / "urls.jsonl"
         assert urls_file.exists(), "urls.jsonl not created"
-        file_lines = [line for line in urls_file.read_text().splitlines() if line.strip()]
-        assert len(file_lines) == 3, f"Expected 3 urls.jsonl entries, got {len(file_lines)}"
+        file_lines = [
+            line for line in urls_file.read_text().splitlines() if line.strip()
+        ]
+        assert len(file_lines) == 3, (
+            f"Expected 3 urls.jsonl entries, got {len(file_lines)}"
+        )
 
     def test_ignores_non_http_schemes(self, tmp_path):
         """Test that non-http schemes are ignored."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
+        input_file = tmp_path / "page.html"
+        input_file.write_text("""
 <html>
 <body>
     <a href="mailto:test@example.com">Email</a>
@@ -102,12 +114,12 @@ def test_ignores_non_http_schemes(self, tmp_path):
     <a href="https://valid.com">Valid</a>
 </body>
 </html>
-        ''')
+        """)
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmp_path)
+        env["SNAP_DIR"] = str(tmp_path)
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -117,27 +129,31 @@ def test_ignores_non_http_schemes(self, tmp_path):
         assert result.returncode == 0
 
         # Parse Snapshot records from stdout
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 1, f"Expected 1 Snapshot record, got {len(lines)}"
 
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://valid.com'
+        assert entry["url"] == "https://valid.com"
 
     def test_handles_html_entities(self, tmp_path):
         """Test that HTML entities in URLs are decoded."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
+        input_file = tmp_path / "page.html"
+        input_file.write_text("""
 <html>
 <body>
     <a href="https://example.com/page?a=1&amp;b=2">Link</a>
 </body>
 </html>
-        ''')
+        """)
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmp_path)
+        env["SNAP_DIR"] = str(tmp_path)
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -145,14 +161,18 @@ def test_handles_html_entities(self, tmp_path):
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
+        assert entry["url"] == "https://example.com/page?a=1&b=2"
 
     def test_deduplicates_urls(self, tmp_path):
         """Test that duplicate URLs are deduplicated."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
+        input_file = tmp_path / "page.html"
+        input_file.write_text("""
 <html>
 <body>
     <a href="https://example.com">Link 1</a>
@@ -160,12 +180,12 @@ def test_deduplicates_urls(self, tmp_path):
     <a href="https://example.com">Link 3</a>
 </body>
 </html>
-        ''')
+        """)
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmp_path)
+        env["SNAP_DIR"] = str(tmp_path)
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -173,13 +193,17 @@ def test_deduplicates_urls(self, tmp_path):
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 1
 
     def test_excludes_source_url(self, tmp_path):
         """Test that the source URL itself is excluded from results."""
-        input_file = tmp_path / 'page.html'
-        source_url = f'file://{input_file}'
+        input_file = tmp_path / "page.html"
+        source_url = f"file://{input_file}"
         input_file.write_text(f'''
 <html>
 <body>
@@ -190,27 +214,31 @@ def test_excludes_source_url(self, tmp_path):
         ''')
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', source_url],
+            [sys.executable, str(SCRIPT_PATH), "--url", source_url],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 1
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://other.com'
+        assert entry["url"] == "https://other.com"
 
     def test_skips_when_no_urls_found(self, tmp_path):
         """Test that script returns skipped status when no URLs found."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('<html><body>No links here</body></html>')
+        input_file = tmp_path / "page.html"
+        input_file.write_text("<html><body>No links here</body></html>")
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmp_path)
+        env["SNAP_DIR"] = str(tmp_path)
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -218,50 +246,58 @@ def test_skips_when_no_urls_found(self, tmp_path):
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
         assert '"status": "skipped"' in result.stdout
 
     def test_handles_malformed_html(self, tmp_path):
         """Test handling of malformed HTML."""
-        input_file = tmp_path / 'malformed.html'
-        input_file.write_text('''
+        input_file = tmp_path / "malformed.html"
+        input_file.write_text("""
 <html>
 <body>
     <a href="https://example.com">Unclosed tag
     <a href="https://other.com">Another link</a>
 </body>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
     def test_output_is_valid_json(self, tmp_path):
         """Test that output contains required fields."""
-        input_file = tmp_path / 'page.html'
+        input_file = tmp_path / "page.html"
         input_file.write_text('<a href="https://example.com">Link</a>')
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert entry['type'] == 'Snapshot'
-        assert entry['plugin'] == 'parse_html_urls'
+        assert entry["url"] == "https://example.com"
+        assert entry["type"] == "Snapshot"
+        assert entry["plugin"] == "parse_html_urls"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py b/abx_plugins/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
index 1a80336..21c6e09 100755
--- a/abx_plugins/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
+++ b/abx_plugins/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
@@ -31,13 +31,13 @@
 
 import rich_click as click
 
-PLUGIN_NAME = 'parse_jsonl_urls'
+PLUGIN_NAME = "parse_jsonl_urls"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-URLS_FILE = Path('urls.jsonl')
+URLS_FILE = Path("urls.jsonl")
 
 
 def parse_bookmarked_at(link: dict) -> str | None:
@@ -46,7 +46,7 @@ def parse_bookmarked_at(link: dict) -> str | None:
 
     def json_date(s: str) -> datetime:
         # Try ISO 8601 format
-        return datetime.strptime(s.split(',', 1)[0], '%Y-%m-%dT%H:%M:%S%z')
+        return datetime.strptime(s.split(",", 1)[0], "%Y-%m-%dT%H:%M:%S%z")
 
     def to_iso(dt: datetime) -> str:
         if dt.tzinfo is None:
@@ -54,24 +54,26 @@ def to_iso(dt: datetime) -> str:
         return dt.isoformat()
 
     try:
-        if link.get('bookmarked_at'):
+        if link.get("bookmarked_at"):
             # Already in our format, pass through
-            return link['bookmarked_at']
-        elif link.get('timestamp'):
+            return link["bookmarked_at"]
+        elif link.get("timestamp"):
             # Chrome/Firefox histories use microseconds
-            return to_iso(datetime.fromtimestamp(link['timestamp'] / 1000000, tz=timezone.utc))
-        elif link.get('time'):
-            return to_iso(json_date(link['time']))
-        elif link.get('created_at'):
-            return to_iso(json_date(link['created_at']))
-        elif link.get('created'):
-            return to_iso(json_date(link['created']))
-        elif link.get('date'):
-            return to_iso(json_date(link['date']))
-        elif link.get('bookmarked'):
-            return to_iso(json_date(link['bookmarked']))
-        elif link.get('saved'):
-            return to_iso(json_date(link['saved']))
+            return to_iso(
+                datetime.fromtimestamp(link["timestamp"] / 1000000, tz=timezone.utc)
+            )
+        elif link.get("time"):
+            return to_iso(json_date(link["time"]))
+        elif link.get("created_at"):
+            return to_iso(json_date(link["created_at"]))
+        elif link.get("created"):
+            return to_iso(json_date(link["created"]))
+        elif link.get("date"):
+            return to_iso(json_date(link["date"]))
+        elif link.get("bookmarked"):
+            return to_iso(json_date(link["bookmarked"]))
+        elif link.get("saved"):
+            return to_iso(json_date(link["saved"]))
     except (ValueError, TypeError, KeyError):
         pass
 
@@ -81,41 +83,41 @@ def to_iso(dt: datetime) -> str:
 def json_object_to_entry(link: dict) -> dict | None:
     """Convert a JSON bookmark object to a URL entry."""
     # Parse URL (try various field names)
-    url = link.get('href') or link.get('url') or link.get('URL')
+    url = link.get("href") or link.get("url") or link.get("URL")
     if not url:
         return None
 
     entry = {
-        'type': 'Snapshot',
-        'url': unescape(url),
-        'plugin': PLUGIN_NAME,
+        "type": "Snapshot",
+        "url": unescape(url),
+        "plugin": PLUGIN_NAME,
     }
 
     # Parse title
     title = None
-    if link.get('title'):
-        title = link['title'].strip()
-    elif link.get('description'):
-        title = link['description'].replace(' — Readability', '').strip()
-    elif link.get('name'):
-        title = link['name'].strip()
+    if link.get("title"):
+        title = link["title"].strip()
+    elif link.get("description"):
+        title = link["description"].replace(" — Readability", "").strip()
+    elif link.get("name"):
+        title = link["name"].strip()
     if title:
-        entry['title'] = unescape(title)
+        entry["title"] = unescape(title)
 
     # Parse bookmarked_at (ISO 8601)
     bookmarked_at = parse_bookmarked_at(link)
     if bookmarked_at:
-        entry['bookmarked_at'] = bookmarked_at
+        entry["bookmarked_at"] = bookmarked_at
 
     # Parse tags
-    tags = link.get('tags', '')
+    tags = link.get("tags", "")
     if isinstance(tags, list):
-        tags = ','.join(tags)
-    elif isinstance(tags, str) and ',' not in tags and tags:
+        tags = ",".join(tags)
+    elif isinstance(tags, str) and "," not in tags and tags:
         # If no comma, assume space-separated
-        tags = tags.replace(' ', ',')
+        tags = tags.replace(" ", ",")
     if tags:
-        entry['tags'] = unescape(tags)
+        entry["tags"] = unescape(tags)
 
     return entry
 
@@ -124,39 +126,47 @@ def fetch_content(url: str) -> str:
     """Fetch content from a URL (supports file:// and https://)."""
     parsed = urlparse(url)
 
-    if parsed.scheme == 'file':
+    if parsed.scheme == "file":
         file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
             return f.read()
     else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
+        timeout = int(os.environ.get("TIMEOUT", "60"))
+        user_agent = os.environ.get(
+            "USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)"
+        )
 
         import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
+
+        req = urllib.request.Request(url, headers={"User-Agent": user_agent})
         with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
+            return response.read().decode("utf-8", errors="replace")
 
 
 @click.command()
-@click.option('--url', required=True, help='JSONL file URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
+@click.option("--url", required=True, help="JSONL file URL to parse")
+@click.option("--snapshot-id", required=False, help="Parent Snapshot UUID")
+@click.option("--crawl-id", required=False, help="Crawl UUID")
+@click.option("--depth", type=int, default=0, help="Current depth level")
+def main(
+    url: str,
+    snapshot_id: str | None = None,
+    crawl_id: str | None = None,
+    depth: int = 0,
+):
     """Parse JSONL bookmark file and extract URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    env_depth = os.environ.get("SNAPSHOT_DEPTH")
     if env_depth is not None:
         try:
             depth = int(env_depth)
         except Exception:
             pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
+    crawl_id = crawl_id or os.environ.get("CRAWL_ID")
 
     try:
         content = fetch_content(url)
     except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
+        click.echo(f"Failed to fetch {url}: {e}", err=True)
         sys.exit(1)
 
     urls_found = []
@@ -172,15 +182,15 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
             entry = json_object_to_entry(link)
             if entry:
                 # Add crawl tracking metadata
-                entry['depth'] = depth + 1
+                entry["depth"] = depth + 1
                 if snapshot_id:
-                    entry['parent_snapshot_id'] = snapshot_id
+                    entry["parent_snapshot_id"] = snapshot_id
                 if crawl_id:
-                    entry['crawl_id'] = crawl_id
+                    entry["crawl_id"] = crawl_id
 
                 # Collect tags
-                if entry.get('tags'):
-                    for tag in entry['tags'].split(','):
+                if entry.get("tags"):
+                    for tag in entry["tags"].split(","):
                         tag = tag.strip()
                         if tag:
                             all_tags.add(tag)
@@ -192,25 +202,31 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
 
     # Emit Tag records first (to stdout as JSONL)
     for tag_name in sorted(all_tags):
-        print(json.dumps({
-            'type': 'Tag',
-            'name': tag_name,
-        }))
+        print(
+            json.dumps(
+                {
+                    "type": "Tag",
+                    "name": tag_name,
+                }
+            )
+        )
 
     # Emit Snapshot records (to stdout as JSONL)
     for entry in urls_found:
         print(json.dumps(entry))
 
     # Write urls.jsonl to disk for crawl system
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in urls_found) + ('\n' if urls_found else ''))
+    URLS_FILE.write_text(
+        "\n".join(json.dumps(r) for r in urls_found) + ("\n" if urls_found else "")
+    )
 
     # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
+    status = "succeeded" if urls_found else "skipped"
     output_str = URLS_FILE.name
     ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output_str,
     }
     print(json.dumps(ar_record))
 
@@ -218,5 +234,5 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py b/abx_plugins/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py
index b425d3f..ec8a452 100644
--- a/abx_plugins/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py
+++ b/abx_plugins/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py
@@ -9,7 +9,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_jsonl_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_jsonl_urls.*"), None)
 
 
 class TestParseJsonlUrls:
@@ -17,7 +17,7 @@ class TestParseJsonlUrls:
 
     def test_extracts_urls_from_jsonl(self, tmp_path):
         """Test extracting URLs from JSONL bookmark file."""
-        input_file = tmp_path / 'bookmarks.jsonl'
+        input_file = tmp_path / "bookmarks.jsonl"
         input_file.write_text(
             '{"url": "https://example.com", "title": "Example"}\n'
             '{"url": "https://foo.bar/page", "title": "Foo Bar"}\n'
@@ -25,37 +25,41 @@ def test_extracts_urls_from_jsonl(self, tmp_path):
         )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
+        assert "urls.jsonl" in result.stderr or "urls.jsonl" in result.stdout
 
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 3
 
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-        titles = {e.get('title') for e in entries}
+        urls = {e["url"] for e in entries}
+        titles = {e.get("title") for e in entries}
 
-        assert 'https://example.com' in urls
-        assert 'https://foo.bar/page' in urls
-        assert 'https://test.org' in urls
-        assert 'Example' in titles
-        assert 'Foo Bar' in titles
-        assert 'Test Org' in titles
+        assert "https://example.com" in urls
+        assert "https://foo.bar/page" in urls
+        assert "https://test.org" in urls
+        assert "Example" in titles
+        assert "Foo Bar" in titles
+        assert "Test Org" in titles
 
     def test_supports_href_field(self, tmp_path):
         """Test that 'href' field is recognized as URL."""
-        input_file = tmp_path / 'bookmarks.jsonl'
+        input_file = tmp_path / "bookmarks.jsonl"
         input_file.write_text('{"href": "https://example.com", "title": "Test"}\n')
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -63,17 +67,23 @@ def test_supports_href_field(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
+        assert entry["url"] == "https://example.com"
 
     def test_supports_description_as_title(self, tmp_path):
         """Test that 'description' field is used as title fallback."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "description": "A description"}\n')
+        input_file = tmp_path / "bookmarks.jsonl"
+        input_file.write_text(
+            '{"url": "https://example.com", "description": "A description"}\n'
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -81,17 +91,23 @@ def test_supports_description_as_title(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['title'] == 'A description'
+        assert entry["title"] == "A description"
 
     def test_parses_various_timestamp_formats(self, tmp_path):
         """Test parsing of different timestamp field names."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "timestamp": 1609459200000000}\n')
+        input_file = tmp_path / "bookmarks.jsonl"
+        input_file.write_text(
+            '{"url": "https://example.com", "timestamp": 1609459200000000}\n'
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -99,18 +115,24 @@ def test_parses_various_timestamp_formats(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
         # Parser converts timestamp to bookmarked_at
-        assert 'bookmarked_at' in entry
+        assert "bookmarked_at" in entry
 
     def test_parses_tags_as_string(self, tmp_path):
         """Test parsing tags as comma-separated string."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "tags": "tech,news,reading"}\n')
+        input_file = tmp_path / "bookmarks.jsonl"
+        input_file.write_text(
+            '{"url": "https://example.com", "tags": "tech,news,reading"}\n'
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -120,15 +142,17 @@ def test_parses_tags_as_string(self, tmp_path):
         # Output goes to stdout (JSONL)
         # Parser converts tags to separate Tag objects in the output
         content = result.stdout
-        assert 'tech' in content or 'news' in content or 'Tag' in content
+        assert "tech" in content or "news" in content or "Tag" in content
 
     def test_parses_tags_as_list(self, tmp_path):
         """Test parsing tags as JSON array."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "tags": ["tech", "news"]}\n')
+        input_file = tmp_path / "bookmarks.jsonl"
+        input_file.write_text(
+            '{"url": "https://example.com", "tags": ["tech", "news"]}\n'
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -138,19 +162,19 @@ def test_parses_tags_as_list(self, tmp_path):
         # Output goes to stdout (JSONL)
         # Parser converts tags to separate Tag objects in the output
         content = result.stdout
-        assert 'tech' in content or 'news' in content or 'Tag' in content
+        assert "tech" in content or "news" in content or "Tag" in content
 
     def test_skips_malformed_lines(self, tmp_path):
         """Test that malformed JSON lines are skipped."""
-        input_file = tmp_path / 'bookmarks.jsonl'
+        input_file = tmp_path / "bookmarks.jsonl"
         input_file.write_text(
             '{"url": "https://valid.com"}\n'
-            'not valid json\n'
+            "not valid json\n"
             '{"url": "https://also-valid.com"}\n'
         )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -158,12 +182,16 @@ def test_skips_malformed_lines(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
     def test_skips_entries_without_url(self, tmp_path):
         """Test that entries without URL field are skipped."""
-        input_file = tmp_path / 'bookmarks.jsonl'
+        input_file = tmp_path / "bookmarks.jsonl"
         input_file.write_text(
             '{"url": "https://valid.com"}\n'
             '{"title": "No URL here"}\n'
@@ -171,7 +199,7 @@ def test_skips_entries_without_url(self, tmp_path):
         )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -179,44 +207,55 @@ def test_skips_entries_without_url(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
     def test_skips_when_no_urls_found(self, tmp_path):
         """Test that script returns skipped status when no URLs found."""
-        input_file = tmp_path / 'empty.jsonl'
+        input_file = tmp_path / "empty.jsonl"
         input_file.write_text('{"title": "No URL"}\n')
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
         assert '"status": "skipped"' in result.stdout
 
     def test_exits_1_when_file_not_found(self, tmp_path):
         """Test that script exits with code 1 when file doesn't exist."""
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/bookmarks.jsonl'],
+            [
+                sys.executable,
+                str(SCRIPT_PATH),
+                "--url",
+                "file:///nonexistent/bookmarks.jsonl",
+            ],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
+        assert "Failed to fetch" in result.stderr
 
     def test_handles_html_entities(self, tmp_path):
         """Test that HTML entities in URLs and titles are decoded."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com/page?a=1&amp;b=2", "title": "Test &amp; Title"}\n')
+        input_file = tmp_path / "bookmarks.jsonl"
+        input_file.write_text(
+            '{"url": "https://example.com/page?a=1&amp;b=2", "title": "Test &amp; Title"}\n'
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -224,23 +263,24 @@ def test_handles_html_entities(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
-        assert entry['title'] == 'Test & Title'
+        assert entry["url"] == "https://example.com/page?a=1&b=2"
+        assert entry["title"] == "Test & Title"
 
     def test_skips_empty_lines(self, tmp_path):
         """Test that empty lines are skipped."""
-        input_file = tmp_path / 'bookmarks.jsonl'
+        input_file = tmp_path / "bookmarks.jsonl"
         input_file.write_text(
-            '{"url": "https://example.com"}\n'
-            '\n'
-            '   \n'
-            '{"url": "https://other.com"}\n'
+            '{"url": "https://example.com"}\n\n   \n{"url": "https://other.com"}\n'
         )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -248,16 +288,20 @@ def test_skips_empty_lines(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
     def test_output_includes_required_fields(self, tmp_path):
         """Test that output includes required fields."""
-        input_file = tmp_path / 'bookmarks.jsonl'
+        input_file = tmp_path / "bookmarks.jsonl"
         input_file.write_text('{"url": "https://example.com"}\n')
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -265,12 +309,16 @@ def test_output_includes_required_fields(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert 'type' in entry
-        assert 'plugin' in entry
+        assert entry["url"] == "https://example.com"
+        assert "type" in entry
+        assert "plugin" in entry
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py b/abx_plugins/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
index 05d9fd8..c15849c 100755
--- a/abx_plugins/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
+++ b/abx_plugins/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
@@ -29,13 +29,13 @@
 
 import rich_click as click
 
-PLUGIN_NAME = 'parse_netscape_urls'
+PLUGIN_NAME = "parse_netscape_urls"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-URLS_FILE = Path('urls.jsonl')
+URLS_FILE = Path("urls.jsonl")
 
 # Constants for timestamp epoch detection
 UNIX_EPOCH = 0  # 1970-01-01 00:00:00 UTC
@@ -50,7 +50,7 @@
 # Make ADD_DATE optional and allow negative numbers
 NETSCAPE_PATTERN = re.compile(
     r'<a\s+href="([^"]+)"(?:\s+add_date="([^"]*)")?(?:\s+[^>]*?tags="([^"]*)")?[^>]*>([^<]+)</a>',
-    re.UNICODE | re.IGNORECASE
+    re.UNICODE | re.IGNORECASE,
 )
 
 
@@ -69,7 +69,7 @@ def parse_timestamp(timestamp_str: str) -> datetime | None:
     2. Pick the one that yields a reasonable date (1995-2035)
     3. Prioritize more common formats (Unix seconds, then Mac seconds, etc.)
     """
-    if not timestamp_str or timestamp_str == '':
+    if not timestamp_str or timestamp_str == "":
         return None
 
     try:
@@ -78,7 +78,6 @@ def parse_timestamp(timestamp_str: str) -> datetime | None:
         return None
 
     # Detect sign and work with absolute value
-    is_negative = timestamp_num < 0
     abs_timestamp = abs(timestamp_num)
 
     # Determine number of digits to guess the unit
@@ -95,7 +94,7 @@ def parse_timestamp(timestamp_str: str) -> datetime | None:
         try:
             dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
             if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'unix_seconds', 100))  # Highest priority
+                candidates.append((dt, "unix_seconds", 100))  # Highest priority
         except (ValueError, OSError, OverflowError):
             pass
 
@@ -103,9 +102,11 @@ def parse_timestamp(timestamp_str: str) -> datetime | None:
     # Only consider if Unix seconds didn't work or gave unreasonable date
     if 8 <= num_digits <= 11:
         try:
-            dt = datetime.fromtimestamp(timestamp_num + MAC_COCOA_EPOCH, tz=timezone.utc)
+            dt = datetime.fromtimestamp(
+                timestamp_num + MAC_COCOA_EPOCH, tz=timezone.utc
+            )
             if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'mac_seconds', 90))
+                candidates.append((dt, "mac_seconds", 90))
         except (ValueError, OSError, OverflowError):
             pass
 
@@ -114,16 +115,18 @@ def parse_timestamp(timestamp_str: str) -> datetime | None:
         try:
             dt = datetime.fromtimestamp(timestamp_num / 1000, tz=timezone.utc)
             if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'unix_milliseconds', 95))
+                candidates.append((dt, "unix_milliseconds", 95))
         except (ValueError, OSError, OverflowError):
             pass
 
     # Mac/Cocoa epoch milliseconds (12-13 digits) - Rare
     if 11 <= num_digits <= 14:
         try:
-            dt = datetime.fromtimestamp((timestamp_num / 1000) + MAC_COCOA_EPOCH, tz=timezone.utc)
+            dt = datetime.fromtimestamp(
+                (timestamp_num / 1000) + MAC_COCOA_EPOCH, tz=timezone.utc
+            )
             if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'mac_milliseconds', 85))
+                candidates.append((dt, "mac_milliseconds", 85))
         except (ValueError, OSError, OverflowError):
             pass
 
@@ -132,16 +135,18 @@ def parse_timestamp(timestamp_str: str) -> datetime | None:
         try:
             dt = datetime.fromtimestamp(timestamp_num / 1_000_000, tz=timezone.utc)
             if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'unix_microseconds', 98))
+                candidates.append((dt, "unix_microseconds", 98))
         except (ValueError, OSError, OverflowError):
             pass
 
     # Mac/Cocoa epoch microseconds (15-16 digits) - Very rare
     if 14 <= num_digits <= 18:
         try:
-            dt = datetime.fromtimestamp((timestamp_num / 1_000_000) + MAC_COCOA_EPOCH, tz=timezone.utc)
+            dt = datetime.fromtimestamp(
+                (timestamp_num / 1_000_000) + MAC_COCOA_EPOCH, tz=timezone.utc
+            )
             if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'mac_microseconds', 80))
+                candidates.append((dt, "mac_microseconds", 80))
         except (ValueError, OSError, OverflowError):
             pass
 
@@ -160,39 +165,47 @@ def fetch_content(url: str) -> str:
     """Fetch content from a URL (supports file:// and https://)."""
     parsed = urlparse(url)
 
-    if parsed.scheme == 'file':
+    if parsed.scheme == "file":
         file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
             return f.read()
     else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
+        timeout = int(os.environ.get("TIMEOUT", "60"))
+        user_agent = os.environ.get(
+            "USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)"
+        )
 
         import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
+
+        req = urllib.request.Request(url, headers={"User-Agent": user_agent})
         with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
+            return response.read().decode("utf-8", errors="replace")
 
 
 @click.command()
-@click.option('--url', required=True, help='Netscape bookmark file URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
+@click.option("--url", required=True, help="Netscape bookmark file URL to parse")
+@click.option("--snapshot-id", required=False, help="Parent Snapshot UUID")
+@click.option("--crawl-id", required=False, help="Crawl UUID")
+@click.option("--depth", type=int, default=0, help="Current depth level")
+def main(
+    url: str,
+    snapshot_id: str | None = None,
+    crawl_id: str | None = None,
+    depth: int = 0,
+):
     """Parse Netscape bookmark HTML and extract URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    env_depth = os.environ.get("SNAPSHOT_DEPTH")
     if env_depth is not None:
         try:
             depth = int(env_depth)
         except Exception:
             pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
+    crawl_id = crawl_id or os.environ.get("CRAWL_ID")
 
     try:
         content = fetch_content(url)
     except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
+        click.echo(f"Failed to fetch {url}: {e}", err=True)
         sys.exit(1)
 
     urls_found = []
@@ -203,25 +216,25 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
         if match:
             bookmark_url = match.group(1)
             timestamp_str = match.group(2)
-            tags_str = match.group(3) or ''
+            tags_str = match.group(3) or ""
             title = match.group(4).strip()
 
             entry = {
-                'type': 'Snapshot',
-                'url': unescape(bookmark_url),
-                'plugin': PLUGIN_NAME,
-                'depth': depth + 1,
+                "type": "Snapshot",
+                "url": unescape(bookmark_url),
+                "plugin": PLUGIN_NAME,
+                "depth": depth + 1,
             }
             if snapshot_id:
-                entry['parent_snapshot_id'] = snapshot_id
+                entry["parent_snapshot_id"] = snapshot_id
             if crawl_id:
-                entry['crawl_id'] = crawl_id
+                entry["crawl_id"] = crawl_id
             if title:
-                entry['title'] = unescape(title)
+                entry["title"] = unescape(title)
             if tags_str:
-                entry['tags'] = tags_str
+                entry["tags"] = tags_str
                 # Collect unique tags
-                for tag in tags_str.split(','):
+                for tag in tags_str.split(","):
                     tag = tag.strip()
                     if tag:
                         all_tags.add(tag)
@@ -230,31 +243,37 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
             if timestamp_str:
                 dt = parse_timestamp(timestamp_str)
                 if dt:
-                    entry['bookmarked_at'] = dt.isoformat()
+                    entry["bookmarked_at"] = dt.isoformat()
 
             urls_found.append(entry)
 
     # Emit Tag records first (to stdout as JSONL)
     for tag_name in sorted(all_tags):
-        print(json.dumps({
-            'type': 'Tag',
-            'name': tag_name,
-        }))
+        print(
+            json.dumps(
+                {
+                    "type": "Tag",
+                    "name": tag_name,
+                }
+            )
+        )
 
     # Emit Snapshot records (to stdout as JSONL)
     for entry in urls_found:
         print(json.dumps(entry))
 
     # Write urls.jsonl to disk for crawl system
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in urls_found) + ('\n' if urls_found else ''))
+    URLS_FILE.write_text(
+        "\n".join(json.dumps(r) for r in urls_found) + ("\n" if urls_found else "")
+    )
 
     # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
+    status = "succeeded" if urls_found else "skipped"
     output_str = URLS_FILE.name
     ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output_str,
     }
     print(json.dumps(ar_record))
 
@@ -262,5 +281,5 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py b/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py
index 43754b5..db5371a 100644
--- a/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py
+++ b/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py
@@ -9,7 +9,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_netscape_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_netscape_urls.*"), None)
 
 
 class TestParseNetscapeUrls:
@@ -17,8 +17,8 @@ class TestParseNetscapeUrls:
 
     def test_extracts_urls_from_netscape_bookmarks(self, tmp_path):
         """Test extracting URLs from Netscape bookmark HTML format."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
 <TITLE>Bookmarks</TITLE>
 <H1>Bookmarks</H1>
@@ -27,42 +27,46 @@ def test_extracts_urls_from_netscape_bookmarks(self, tmp_path):
     <DT><A HREF="https://foo.bar/page" ADD_DATE="1609545600">Foo Bar</A>
     <DT><A HREF="https://test.org" ADD_DATE="1609632000">Test Org</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
+        assert "urls.jsonl" in result.stderr or "urls.jsonl" in result.stdout
 
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 3
 
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-        titles = {e.get('title') for e in entries}
+        urls = {e["url"] for e in entries}
+        titles = {e.get("title") for e in entries}
 
-        assert 'https://example.com' in urls
-        assert 'https://foo.bar/page' in urls
-        assert 'https://test.org' in urls
-        assert 'Example Site' in titles
-        assert 'Foo Bar' in titles
-        assert 'Test Org' in titles
+        assert "https://example.com" in urls
+        assert "https://foo.bar/page" in urls
+        assert "https://test.org" in urls
+        assert "Example Site" in titles
+        assert "Foo Bar" in titles
+        assert "Test Org" in titles
 
     def test_parses_add_date_timestamps(self, tmp_path):
         """Test that ADD_DATE timestamps are parsed correctly."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="1609459200">Test</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -70,20 +74,24 @@ def test_parses_add_date_timestamps(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
         # Parser converts timestamp to bookmarked_at
-        assert 'bookmarked_at' in entry
+        assert "bookmarked_at" in entry
 
     def test_handles_query_params_in_urls(self, tmp_path):
         """Test that URLs with query parameters are preserved."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com/search?q=test+query&page=1" ADD_DATE="1609459200">Search</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -91,20 +99,24 @@ def test_handles_query_params_in_urls(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert 'q=test+query' in entry['url']
-        assert 'page=1' in entry['url']
+        assert "q=test+query" in entry["url"]
+        assert "page=1" in entry["url"]
 
     def test_handles_html_entities(self, tmp_path):
         """Test that HTML entities in URLs and titles are decoded."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com/page?a=1&amp;b=2" ADD_DATE="1609459200">Test &amp; Title</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -112,48 +124,57 @@ def test_handles_html_entities(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
-        assert entry['title'] == 'Test & Title'
+        assert entry["url"] == "https://example.com/page?a=1&b=2"
+        assert entry["title"] == "Test & Title"
 
     def test_skips_when_no_bookmarks_found(self, tmp_path):
         """Test that script returns skipped status when no bookmarks found."""
-        input_file = tmp_path / 'empty.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "empty.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <TITLE>Bookmarks</TITLE>
 <H1>Bookmarks</H1>
 <DL><p>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
         assert '"status": "skipped"' in result.stdout
 
     def test_exits_1_when_file_not_found(self, tmp_path):
         """Test that script exits with code 1 when file doesn't exist."""
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/bookmarks.html'],
+            [
+                sys.executable,
+                str(SCRIPT_PATH),
+                "--url",
+                "file:///nonexistent/bookmarks.html",
+            ],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
+        assert "Failed to fetch" in result.stderr
 
     def test_handles_nested_folders(self, tmp_path):
         """Test parsing bookmarks in nested folder structure."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <DL><p>
     <DT><H3>Folder 1</H3>
     <DL><p>
@@ -165,10 +186,10 @@ def test_handles_nested_folders(self, tmp_path):
     </DL><p>
     <DT><A HREF="https://example.com/top" ADD_DATE="1609459200">Top Level</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -176,22 +197,26 @@ def test_handles_nested_folders(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+        urls = {json.loads(line)["url"] for line in lines}
 
-        assert 'https://example.com/nested1' in urls
-        assert 'https://example.com/nested2' in urls
-        assert 'https://example.com/top' in urls
+        assert "https://example.com/nested1" in urls
+        assert "https://example.com/nested2" in urls
+        assert "https://example.com/top" in urls
 
     def test_case_insensitive_parsing(self, tmp_path):
         """Test that parsing is case-insensitive for HTML tags."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <dt><a HREF="https://example.com" ADD_DATE="1609459200">Test</a>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -199,10 +224,14 @@ def test_case_insensitive_parsing(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
+        assert entry["url"] == "https://example.com"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py b/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py
index 402b823..14dbe6d 100644
--- a/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py
+++ b/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py
@@ -10,7 +10,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_netscape_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_netscape_urls.*"), None)
 
 
 class TestFirefoxFormat:
@@ -18,8 +18,8 @@ class TestFirefoxFormat:
 
     def test_firefox_basic_format(self, tmp_path):
         """Test standard Firefox export format with Unix timestamps in seconds."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <!-- This is an automatically generated file.
      It will be read and overwritten.
      DO NOT EDIT! -->
@@ -30,10 +30,10 @@ def test_firefox_basic_format(self, tmp_path):
     <DT><A HREF="https://example.com" ADD_DATE="1609459200" LAST_MODIFIED="1609545600">Example Site</A>
     <DT><A HREF="https://mozilla.org" ADD_DATE="1640995200">Mozilla</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -41,29 +41,33 @@ def test_firefox_basic_format(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         assert len(entries) == 2
-        assert entries[0]['url'] == 'https://example.com'
-        assert entries[0]['title'] == 'Example Site'
+        assert entries[0]["url"] == "https://example.com"
+        assert entries[0]["title"] == "Example Site"
         # Timestamp should be parsed as seconds (Jan 1, 2021)
-        assert '2021-01-01' in entries[0]['bookmarked_at']
+        assert "2021-01-01" in entries[0]["bookmarked_at"]
         # Second bookmark (Jan 1, 2022)
-        assert '2022-01-01' in entries[1]['bookmarked_at']
+        assert "2022-01-01" in entries[1]["bookmarked_at"]
 
     def test_firefox_with_tags(self, tmp_path):
         """Test Firefox bookmarks with tags."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <DL><p>
     <DT><A HREF="https://example.com" ADD_DATE="1609459200" TAGS="coding,tutorial,python">Python Tutorial</A>
     <DT><A HREF="https://rust-lang.org" ADD_DATE="1609459200" TAGS="coding,rust">Rust Lang</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -71,26 +75,30 @@ def test_firefox_with_tags(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL) - get all JSONL records
-        all_lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.startswith('{')]
+        all_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and line.startswith("{")
+        ]
         records = [json.loads(line) for line in all_lines]
 
         # Should have Tag records + Snapshot records
-        tags = [r for r in records if r.get('type') == 'Tag']
-        snapshots = [r for r in records if r.get('type') == 'Snapshot']
+        tags = [r for r in records if r.get("type") == "Tag"]
+        snapshots = [r for r in records if r.get("type") == "Snapshot"]
 
-        tag_names = {t['name'] for t in tags}
-        assert 'coding' in tag_names
-        assert 'tutorial' in tag_names
-        assert 'python' in tag_names
-        assert 'rust' in tag_names
+        tag_names = {t["name"] for t in tags}
+        assert "coding" in tag_names
+        assert "tutorial" in tag_names
+        assert "python" in tag_names
+        assert "rust" in tag_names
 
-        assert snapshots[0]['tags'] == 'coding,tutorial,python'
-        assert snapshots[1]['tags'] == 'coding,rust'
+        assert snapshots[0]["tags"] == "coding,tutorial,python"
+        assert snapshots[1]["tags"] == "coding,rust"
 
     def test_firefox_nested_folders(self, tmp_path):
         """Test Firefox bookmark folders and nested structure."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <DL><p>
     <DT><H3 ADD_DATE="1609459200" LAST_MODIFIED="1609545600">Toolbar</H3>
     <DL><p>
@@ -103,10 +111,10 @@ def test_firefox_nested_folders(self, tmp_path):
     </DL><p>
     <DT><A HREF="https://news.ycombinator.com" ADD_DATE="1609459200">Hacker News</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -114,28 +122,32 @@ def test_firefox_nested_folders(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
+        urls = {e["url"] for e in entries}
 
-        assert 'https://github.com' in urls
-        assert 'https://stackoverflow.com' in urls
-        assert 'https://developer.mozilla.org' in urls
-        assert 'https://news.ycombinator.com' in urls
+        assert "https://github.com" in urls
+        assert "https://stackoverflow.com" in urls
+        assert "https://developer.mozilla.org" in urls
+        assert "https://news.ycombinator.com" in urls
         assert len(entries) == 4
 
     def test_firefox_icon_and_icon_uri(self, tmp_path):
         """Test Firefox bookmarks with ICON and ICON_URI attributes."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <DL><p>
     <DT><A HREF="https://example.com" ADD_DATE="1609459200" ICON="data:image/png;base64,iVBORw0K">Example</A>
     <DT><A HREF="https://github.com" ADD_DATE="1609459200" ICON_URI="https://github.com/favicon.ico">GitHub</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -143,11 +155,15 @@ def test_firefox_icon_and_icon_uri(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
-        assert entries[0]['url'] == 'https://example.com'
-        assert entries[1]['url'] == 'https://github.com'
+        assert entries[0]["url"] == "https://example.com"
+        assert entries[1]["url"] == "https://github.com"
 
 
 class TestChromeFormat:
@@ -155,10 +171,10 @@ class TestChromeFormat:
 
     def test_chrome_microsecond_timestamps(self, tmp_path):
         """Test Chrome format with microsecond timestamps (16-17 digits)."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # Chrome uses WebKit/Chrome timestamps which are microseconds
         # 1609459200000000 = Jan 1, 2021 00:00:00 in microseconds
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
 <TITLE>Bookmarks</TITLE>
 <H1>Bookmarks</H1>
@@ -166,10 +182,10 @@ def test_chrome_microsecond_timestamps(self, tmp_path):
     <DT><A HREF="https://google.com" ADD_DATE="1609459200000000">Google</A>
     <DT><A HREF="https://chrome.google.com" ADD_DATE="1640995200000000">Chrome</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -177,22 +193,26 @@ def test_chrome_microsecond_timestamps(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         # Should correctly parse microsecond timestamps
         # Currently will fail - we'll fix the parser after writing tests
-        assert entries[0]['url'] == 'https://google.com'
+        assert entries[0]["url"] == "https://google.com"
         # Timestamp should be around Jan 1, 2021, not year 52970!
-        if 'bookmarked_at' in entries[0]:
-            year = datetime.fromisoformat(entries[0]['bookmarked_at']).year
+        if "bookmarked_at" in entries[0]:
+            year = datetime.fromisoformat(entries[0]["bookmarked_at"]).year
             # Should be 2021, not some far future date
             assert 2020 <= year <= 2025, f"Year should be ~2021, got {year}"
 
     def test_chrome_with_folders(self, tmp_path):
         """Test Chrome bookmark folder structure."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <DL><p>
     <DT><H3 ADD_DATE="1609459200" LAST_MODIFIED="1609459200" PERSONAL_TOOLBAR_FOLDER="true">Bookmarks bar</H3>
     <DL><p>
@@ -203,10 +223,10 @@ def test_chrome_with_folders(self, tmp_path):
         <DT><A HREF="https://example.com" ADD_DATE="1609459200">Example</A>
     </DL><p>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -214,12 +234,16 @@ def test_chrome_with_folders(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
+        urls = {e["url"] for e in entries}
 
-        assert 'https://google.com' in urls
-        assert 'https://example.com' in urls
+        assert "https://google.com" in urls
+        assert "https://example.com" in urls
 
 
 class TestSafariFormat:
@@ -227,8 +251,8 @@ class TestSafariFormat:
 
     def test_safari_basic_format(self, tmp_path):
         """Test Safari export format."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
 <Title>Bookmarks</Title>
 <H1>Bookmarks</H1>
@@ -239,10 +263,10 @@ def test_safari_basic_format(self, tmp_path):
         <DT><A HREF="https://webkit.org" ADD_DATE="1609459200">WebKit</A>
     </DL><p>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -250,17 +274,21 @@ def test_safari_basic_format(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
+        urls = {e["url"] for e in entries}
 
-        assert 'https://apple.com' in urls
-        assert 'https://webkit.org' in urls
+        assert "https://apple.com" in urls
+        assert "https://webkit.org" in urls
 
     def test_safari_reading_list(self, tmp_path):
         """Test Safari Reading List entries."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <DL><p>
     <DT><H3 FOLDED ADD_DATE="1609459200">com.apple.ReadingList</H3>
     <DL><p>
@@ -270,10 +298,10 @@ def test_safari_reading_list(self, tmp_path):
         <DD>Another saved article
     </DL><p>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -281,12 +309,16 @@ def test_safari_reading_list(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
+        urls = {e["url"] for e in entries}
 
-        assert 'https://article1.com' in urls
-        assert 'https://article2.com' in urls
+        assert "https://article1.com" in urls
+        assert "https://article2.com" in urls
 
 
 class TestEdgeFormat:
@@ -294,8 +326,8 @@ class TestEdgeFormat:
 
     def test_edge_chromium_format(self, tmp_path):
         """Test Edge (Chromium-based) format."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""<!DOCTYPE NETSCAPE-Bookmark-file-1>
 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
 <TITLE>Bookmarks</TITLE>
 <H1>Bookmarks</H1>
@@ -303,10 +335,10 @@ def test_edge_chromium_format(self, tmp_path):
     <DT><A HREF="https://microsoft.com" ADD_DATE="1609459200">Microsoft</A>
     <DT><A HREF="https://bing.com" ADD_DATE="1609459200">Bing</A>
 </DL><p>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -314,12 +346,16 @@ def test_edge_chromium_format(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
+        urls = {e["url"] for e in entries}
 
-        assert 'https://microsoft.com' in urls
-        assert 'https://bing.com' in urls
+        assert "https://microsoft.com" in urls
+        assert "https://bing.com" in urls
 
 
 class TestTimestampFormats:
@@ -327,14 +363,14 @@ class TestTimestampFormats:
 
     def test_unix_seconds_timestamp(self, tmp_path):
         """Test Unix epoch timestamp in seconds (10-11 digits) - Firefox, Chrome HTML export."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 1609459200 = Jan 1, 2021 00:00:00 UTC (Unix epoch)
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="1609459200">Test</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -342,26 +378,30 @@ def test_unix_seconds_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         assert dt.year == 2021
         assert dt.month == 1
         assert dt.day == 1
 
     def test_mac_cocoa_seconds_timestamp(self, tmp_path):
         """Test Mac/Cocoa epoch timestamp in seconds - Safari uses epoch of 2001-01-01."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # Safari uses Mac absolute time: seconds since 2001-01-01 00:00:00 UTC
         # 631152000 seconds after 2001-01-01 = Jan 1, 2021
         # 631152000 as Unix would be Feb 1990 (too old for a recent bookmark)
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://apple.com" ADD_DATE="631152000">Safari Bookmark</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -369,23 +409,27 @@ def test_mac_cocoa_seconds_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         # Should detect Mac epoch and convert correctly to 2021
         assert 2020 <= dt.year <= 2022, f"Expected ~2021, got {dt.year}"
 
     def test_safari_recent_timestamp(self, tmp_path):
         """Test recent Safari timestamp (Mac epoch)."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 725846400 seconds after 2001-01-01 = Jan 1, 2024
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://webkit.org" ADD_DATE="725846400">Recent Safari</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -393,23 +437,27 @@ def test_safari_recent_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         # Should detect Mac epoch and convert to 2024
         assert 2023 <= dt.year <= 2025, f"Expected ~2024, got {dt.year}"
 
     def test_unix_milliseconds_timestamp(self, tmp_path):
         """Test Unix epoch timestamp in milliseconds (13 digits) - Some JavaScript exports."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 1609459200000 = Jan 1, 2021 00:00:00 UTC in milliseconds
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="1609459200000">Test</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -417,25 +465,29 @@ def test_unix_milliseconds_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         assert dt.year == 2021
         assert dt.month == 1
         assert dt.day == 1
 
     def test_chrome_webkit_microseconds_timestamp(self, tmp_path):
         """Test Chrome WebKit timestamp in microseconds (16-17 digits) - Chrome internal format."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 1609459200000000 = Jan 1, 2021 00:00:00 UTC in microseconds (Unix epoch)
         # Chrome sometimes exports with microsecond precision
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="1609459200000000">Test</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -443,24 +495,28 @@ def test_chrome_webkit_microseconds_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         assert dt.year == 2021
         assert dt.month == 1
         assert dt.day == 1
 
     def test_mac_cocoa_milliseconds_timestamp(self, tmp_path):
         """Test Mac/Cocoa epoch in milliseconds (rare but possible)."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 631152000000 milliseconds after 2001-01-01 = Jan 1, 2021
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://apple.com" ADD_DATE="631152000000">Safari Milliseconds</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -468,26 +524,30 @@ def test_mac_cocoa_milliseconds_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         # Should detect Mac epoch with milliseconds and convert to 2021
         assert 2020 <= dt.year <= 2022, f"Expected ~2021, got {dt.year}"
 
     def test_ambiguous_timestamp_detection(self, tmp_path):
         """Test that ambiguous timestamps are resolved to reasonable dates."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # Test multiple bookmarks with different timestamp formats mixed together
         # Parser should handle each correctly
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://unix-seconds.com" ADD_DATE="1609459200">Unix Seconds 2021</A>
 <DT><A HREF="https://mac-seconds.com" ADD_DATE="631152000">Mac Seconds 2021</A>
 <DT><A HREF="https://unix-ms.com" ADD_DATE="1704067200000">Unix MS 2024</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -495,24 +555,30 @@ def test_ambiguous_timestamp_detection(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         # All should be parsed to reasonable dates (2020-2025)
         for entry in entries:
-            dt = datetime.fromisoformat(entry['bookmarked_at'])
-            assert 2020 <= dt.year <= 2025, f"Date {dt.year} out of reasonable range for {entry['url']}"
+            dt = datetime.fromisoformat(entry["bookmarked_at"])
+            assert 2020 <= dt.year <= 2025, (
+                f"Date {dt.year} out of reasonable range for {entry['url']}"
+            )
 
     def test_very_old_timestamp(self, tmp_path):
         """Test very old timestamp (1990s)."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 820454400 = Jan 1, 1996
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="820454400">Old Bookmark</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -520,22 +586,26 @@ def test_very_old_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         assert dt.year == 1996
 
     def test_recent_timestamp(self, tmp_path):
         """Test recent timestamp (2024)."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # 1704067200 = Jan 1, 2024
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="1704067200">Recent</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -543,21 +613,25 @@ def test_recent_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
+        dt = datetime.fromisoformat(entry["bookmarked_at"])
         assert dt.year == 2024
 
     def test_invalid_timestamp(self, tmp_path):
         """Test invalid/malformed timestamp - should extract URL but skip timestamp."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="invalid">Test</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -565,22 +639,26 @@ def test_invalid_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
         # Should still extract URL but skip timestamp
-        assert entry['url'] == 'https://example.com'
-        assert 'bookmarked_at' not in entry
+        assert entry["url"] == "https://example.com"
+        assert "bookmarked_at" not in entry
 
     def test_zero_timestamp(self, tmp_path):
         """Test timestamp of 0 (Unix epoch) - too old, should be skipped."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="0">Test</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -588,25 +666,29 @@ def test_zero_timestamp(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
         # Timestamp 0 = 1970, which is before MIN_REASONABLE_YEAR (1995)
         # Parser should skip it as unreasonable
-        assert entry['url'] == 'https://example.com'
+        assert entry["url"] == "https://example.com"
         # Timestamp should be omitted (outside reasonable range)
-        assert 'bookmarked_at' not in entry
+        assert "bookmarked_at" not in entry
 
     def test_negative_timestamp(self, tmp_path):
         """Test negative timestamp (before Unix epoch) - should handle gracefully."""
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         # -86400 = 1 day before Unix epoch = Dec 31, 1969
-        input_file.write_text('''
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="-86400">Before Unix Epoch</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -615,12 +697,16 @@ def test_negative_timestamp(self, tmp_path):
         # Should handle gracefully (extracts URL, may or may not include timestamp)
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
+        assert entry["url"] == "https://example.com"
         # If timestamp is included, should be reasonable (1969)
-        if 'bookmarked_at' in entry:
-            dt = datetime.fromisoformat(entry['bookmarked_at'])
+        if "bookmarked_at" in entry:
+            dt = datetime.fromisoformat(entry["bookmarked_at"])
             # Should be near Unix epoch (late 1969)
             assert 1969 <= dt.year <= 1970
 
@@ -630,14 +716,14 @@ class TestBookmarkAttributes:
 
     def test_private_attribute(self, tmp_path):
         """Test bookmarks with PRIVATE attribute."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://private.example.com" ADD_DATE="1609459200" PRIVATE="1">Private</A>
 <DT><A HREF="https://public.example.com" ADD_DATE="1609459200">Public</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -645,7 +731,11 @@ def test_private_attribute(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         # Both should be extracted
@@ -653,13 +743,13 @@ def test_private_attribute(self, tmp_path):
 
     def test_shortcuturl_attribute(self, tmp_path):
         """Test bookmarks with SHORTCUTURL keyword attribute."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://google.com/search?q=%s" ADD_DATE="1609459200" SHORTCUTURL="g">Google Search</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -667,20 +757,24 @@ def test_shortcuturl_attribute(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert 'google.com' in entry['url']
+        assert "google.com" in entry["url"]
 
     def test_post_data_attribute(self, tmp_path):
         """Test bookmarks with POST_DATA attribute."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com/login" ADD_DATE="1609459200" POST_DATA="user=test">Login</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -688,10 +782,14 @@ def test_post_data_attribute(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['url'] == 'https://example.com/login'
+        assert entry["url"] == "https://example.com/login"
 
 
 class TestEdgeCases:
@@ -699,17 +797,17 @@ class TestEdgeCases:
 
     def test_multiline_bookmark(self, tmp_path):
         """Test bookmark spanning multiple lines."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com"
        ADD_DATE="1609459200"
        TAGS="tag1,tag2">
     Multi-line Bookmark
 </A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -721,20 +819,24 @@ def test_multiline_bookmark(self, tmp_path):
             # Output goes to stdout (JSONL)
             content = result.stdout.strip()
             if content:
-                lines = [line for line in content.split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+                lines = [
+                    line
+                    for line in content.split("\n")
+                    if line.strip() and '"type": "Snapshot"' in line
+                ]
                 if lines:
                     entry = json.loads(lines[0])
-                    assert 'example.com' in entry['url']
+                    assert "example.com" in entry["url"]
 
     def test_missing_add_date(self, tmp_path):
         """Test bookmark without ADD_DATE attribute - should still extract URL."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com">No Date</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -743,21 +845,25 @@ def test_missing_add_date(self, tmp_path):
         # Should succeed and extract URL without timestamp
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert entry['title'] == 'No Date'
-        assert 'bookmarked_at' not in entry
+        assert entry["url"] == "https://example.com"
+        assert entry["title"] == "No Date"
+        assert "bookmarked_at" not in entry
 
     def test_empty_title(self, tmp_path):
         """Test bookmark with empty title."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com" ADD_DATE="1609459200"></A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -767,20 +873,20 @@ def test_empty_title(self, tmp_path):
         # Parser emits skipped ArchiveResult when no valid bookmarks found
         assert result.returncode == 0
         result_json = json.loads(result.stdout.strip())
-        assert result_json['type'] == 'ArchiveResult'
-        assert result_json['status'] == 'skipped'
+        assert result_json["type"] == "ArchiveResult"
+        assert result_json["status"] == "skipped"
 
     def test_special_chars_in_url(self, tmp_path):
         """Test URLs with special characters."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="https://example.com/path?q=test&foo=bar&baz=qux#section" ADD_DATE="1609459200">Special URL</A>
 <DT><A HREF="https://example.com/path%20with%20spaces" ADD_DATE="1609459200">Encoded Spaces</A>
 <DT><A HREF="https://example.com/unicode/日本語" ADD_DATE="1609459200">Unicode Path</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -788,23 +894,27 @@ def test_special_chars_in_url(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         assert len(entries) == 3
-        assert 'q=test&foo=bar' in entries[0]['url']
-        assert '%20' in entries[1]['url']
+        assert "q=test&foo=bar" in entries[0]["url"]
+        assert "%20" in entries[1]["url"]
 
     def test_javascript_url(self, tmp_path):
         """Test javascript: URLs (should still be extracted)."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="javascript:alert('test')" ADD_DATE="1609459200">JS Bookmarklet</A>
 <DT><A HREF="https://example.com" ADD_DATE="1609459200">Normal</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -812,22 +922,26 @@ def test_javascript_url(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         # Both should be extracted
         assert len(entries) == 2
-        assert entries[0]['url'].startswith('javascript:')
+        assert entries[0]["url"].startswith("javascript:")
 
     def test_data_url(self, tmp_path):
         """Test data: URLs."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="data:text/html,<h1>Test</h1>" ADD_DATE="1609459200">Data URL</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -835,20 +949,24 @@ def test_data_url(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['url'].startswith('data:')
+        assert entry["url"].startswith("data:")
 
     def test_file_url(self, tmp_path):
         """Test file:// URLs."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text("""
 <DT><A HREF="file:///home/user/document.pdf" ADD_DATE="1609459200">Local File</A>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -856,21 +974,27 @@ def test_file_url(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['url'].startswith('file://')
+        assert entry["url"].startswith("file://")
 
     def test_very_long_url(self, tmp_path):
         """Test very long URLs (2000+ characters)."""
-        long_url = 'https://example.com/path?' + '&'.join([f'param{i}=value{i}' for i in range(100)])
-        input_file = tmp_path / 'bookmarks.html'
+        long_url = "https://example.com/path?" + "&".join(
+            [f"param{i}=value{i}" for i in range(100)]
+        )
+        input_file = tmp_path / "bookmarks.html"
         input_file.write_text(f'''
 <DT><A HREF="{long_url}" ADD_DATE="1609459200">Long URL</A>
         ''')
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -878,25 +1002,32 @@ def test_very_long_url(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert len(entry['url']) > 1000
-        assert entry['url'].startswith('https://example.com')
+        assert len(entry["url"]) > 1000
+        assert entry["url"].startswith("https://example.com")
 
     def test_unicode_in_title(self, tmp_path):
         """Test Unicode characters in titles."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
+        input_file = tmp_path / "bookmarks.html"
+        input_file.write_text(
+            """
 <DT><A HREF="https://example.com" ADD_DATE="1609459200">日本語のタイトル</A>
 <DT><A HREF="https://example.org" ADD_DATE="1609459200">Título en Español</A>
 <DT><A HREF="https://example.net" ADD_DATE="1609459200">Заголовок на русском</A>
 <DT><A HREF="https://example.biz" ADD_DATE="1609459200">عنوان بالعربية</A>
 <DT><A HREF="https://example.info" ADD_DATE="1609459200">Emoji 🚀 📚 🎉</A>
-        ''', encoding='utf-8')
+        """,
+            encoding="utf-8",
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -904,12 +1035,16 @@ def test_unicode_in_title(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entries = [json.loads(line) for line in lines]
 
         assert len(entries) == 5
-        assert any('日本語' in e.get('title', '') for e in entries)
-        assert any('Español' in e.get('title', '') for e in entries)
+        assert any("日本語" in e.get("title", "") for e in entries)
+        assert any("Español" in e.get("title", "") for e in entries)
 
     def test_large_file_many_bookmarks(self, tmp_path):
         """Test parsing large file with many bookmarks (1000+)."""
@@ -919,15 +1054,15 @@ def test_large_file_many_bookmarks(self, tmp_path):
                 f'<DT><A HREF="https://example.com/page{i}" ADD_DATE="1609459200" TAGS="tag{i % 10}">Bookmark {i}</A>'
             )
 
-        input_file = tmp_path / 'bookmarks.html'
+        input_file = tmp_path / "bookmarks.html"
         input_file.write_text(
-            '<!DOCTYPE NETSCAPE-Bookmark-file-1>\n<DL><p>\n' +
-            '\n'.join(bookmarks) +
-            '\n</DL><p>'
+            "<!DOCTYPE NETSCAPE-Bookmark-file-1>\n<DL><p>\n"
+            + "\n".join(bookmarks)
+            + "\n</DL><p>"
         )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -935,19 +1070,23 @@ def test_large_file_many_bookmarks(self, tmp_path):
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
+        assert "urls.jsonl" in result.stderr or "urls.jsonl" in result.stdout
 
         # Output goes to stdout (JSONL) - get all JSONL records
-        all_lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.startswith('{')]
+        all_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and line.startswith("{")
+        ]
         records = [json.loads(line) for line in all_lines]
 
         # Should have 10 unique tags + 1000 snapshots
-        tags = [r for r in records if r.get('type') == 'Tag']
-        snapshots = [r for r in records if r.get('type') == 'Snapshot']
+        tags = [r for r in records if r.get("type") == "Tag"]
+        snapshots = [r for r in records if r.get("type") == "Snapshot"]
 
         assert len(tags) == 10
         assert len(snapshots) == 1000
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py b/abx_plugins/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
index c0bf462..587640c 100755
--- a/abx_plugins/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
+++ b/abx_plugins/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
@@ -23,25 +23,28 @@
 import json
 import os
 import sys
+from importlib import import_module
 from pathlib import Path
 from datetime import datetime, timezone
 from html import unescape
 from time import mktime
+from typing import Any
 from urllib.parse import urlparse
 
 import rich_click as click
 
-PLUGIN_NAME = 'parse_rss_urls'
+PLUGIN_NAME = "parse_rss_urls"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-URLS_FILE = Path('urls.jsonl')
+URLS_FILE = Path("urls.jsonl")
 
+feedparser: Any | None
 try:
-    import feedparser
-except ImportError:
+    feedparser = import_module("feedparser")
+except ModuleNotFoundError:
     feedparser = None
 
 
@@ -49,43 +52,51 @@ def fetch_content(url: str) -> str:
     """Fetch content from a URL (supports file:// and https://)."""
     parsed = urlparse(url)
 
-    if parsed.scheme == 'file':
+    if parsed.scheme == "file":
         file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
             return f.read()
     else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
+        timeout = int(os.environ.get("TIMEOUT", "60"))
+        user_agent = os.environ.get(
+            "USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)"
+        )
 
         import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
+
+        req = urllib.request.Request(url, headers={"User-Agent": user_agent})
         with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
+            return response.read().decode("utf-8", errors="replace")
 
 
 @click.command()
-@click.option('--url', required=True, help='RSS/Atom feed URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
+@click.option("--url", required=True, help="RSS/Atom feed URL to parse")
+@click.option("--snapshot-id", required=False, help="Parent Snapshot UUID")
+@click.option("--crawl-id", required=False, help="Crawl UUID")
+@click.option("--depth", type=int, default=0, help="Current depth level")
+def main(
+    url: str,
+    snapshot_id: str | None = None,
+    crawl_id: str | None = None,
+    depth: int = 0,
+):
     """Parse RSS/Atom feed and extract article URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    env_depth = os.environ.get("SNAPSHOT_DEPTH")
     if env_depth is not None:
         try:
             depth = int(env_depth)
         except Exception:
             pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
+    crawl_id = crawl_id or os.environ.get("CRAWL_ID")
 
     if feedparser is None:
-        click.echo('feedparser library not installed', err=True)
+        click.echo("feedparser library not installed", err=True)
         sys.exit(1)
 
     try:
         content = fetch_content(url)
     except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
+        click.echo(f"Failed to fetch {url}: {e}", err=True)
         sys.exit(1)
 
     # Parse the feed
@@ -99,26 +110,32 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
         pass
     else:
         for item in feed.entries:
-            item_url = getattr(item, 'link', None)
+            item_url = getattr(item, "link", None)
             if not item_url:
                 continue
 
-            title = getattr(item, 'title', None)
+            title = getattr(item, "title", None)
 
             # Get bookmarked_at (published/updated date as ISO 8601)
             bookmarked_at = None
-            if hasattr(item, 'published_parsed') and item.published_parsed:
-                bookmarked_at = datetime.fromtimestamp(mktime(item.published_parsed), tz=timezone.utc).isoformat()
-            elif hasattr(item, 'updated_parsed') and item.updated_parsed:
-                bookmarked_at = datetime.fromtimestamp(mktime(item.updated_parsed), tz=timezone.utc).isoformat()
+            if hasattr(item, "published_parsed") and item.published_parsed:
+                bookmarked_at = datetime.fromtimestamp(
+                    mktime(item.published_parsed), tz=timezone.utc
+                ).isoformat()
+            elif hasattr(item, "updated_parsed") and item.updated_parsed:
+                bookmarked_at = datetime.fromtimestamp(
+                    mktime(item.updated_parsed), tz=timezone.utc
+                ).isoformat()
 
             # Get tags
-            tags = ''
-            if hasattr(item, 'tags') and item.tags:
+            tags = ""
+            if hasattr(item, "tags") and item.tags:
                 try:
-                    tags = ','.join(tag.term for tag in item.tags if hasattr(tag, 'term'))
+                    tags = ",".join(
+                        tag.term for tag in item.tags if hasattr(tag, "term")
+                    )
                     # Collect unique tags
-                    for tag in tags.split(','):
+                    for tag in tags.split(","):
                         tag = tag.strip()
                         if tag:
                             all_tags.add(tag)
@@ -126,44 +143,50 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
                     pass
 
             entry = {
-                'type': 'Snapshot',
-                'url': unescape(item_url),
-                'plugin': PLUGIN_NAME,
-                'depth': depth + 1,
+                "type": "Snapshot",
+                "url": unescape(item_url),
+                "plugin": PLUGIN_NAME,
+                "depth": depth + 1,
             }
             if snapshot_id:
-                entry['parent_snapshot_id'] = snapshot_id
+                entry["parent_snapshot_id"] = snapshot_id
             if crawl_id:
-                entry['crawl_id'] = crawl_id
+                entry["crawl_id"] = crawl_id
             if title:
-                entry['title'] = unescape(title)
+                entry["title"] = unescape(title)
             if bookmarked_at:
-                entry['bookmarked_at'] = bookmarked_at
+                entry["bookmarked_at"] = bookmarked_at
             if tags:
-                entry['tags'] = tags
+                entry["tags"] = tags
             urls_found.append(entry)
 
     # Emit Tag records first (to stdout as JSONL)
     for tag_name in sorted(all_tags):
-        print(json.dumps({
-            'type': 'Tag',
-            'name': tag_name,
-        }))
+        print(
+            json.dumps(
+                {
+                    "type": "Tag",
+                    "name": tag_name,
+                }
+            )
+        )
 
     # Emit Snapshot records (to stdout as JSONL)
     for entry in urls_found:
         print(json.dumps(entry))
 
     # Write urls.jsonl to disk for crawl system
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in urls_found) + ('\n' if urls_found else ''))
+    URLS_FILE.write_text(
+        "\n".join(json.dumps(r) for r in urls_found) + ("\n" if urls_found else "")
+    )
 
     # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
+    status = "succeeded" if urls_found else "skipped"
     output_str = URLS_FILE.name
     ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output_str,
     }
     print(json.dumps(ar_record))
 
@@ -171,5 +194,5 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls.py b/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
index 3cd54f6..3b256f1 100644
--- a/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
+++ b/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
@@ -9,7 +9,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_rss_urls.*"), None)
 
 
 class TestParseRssUrls:
@@ -19,11 +19,16 @@ def test_parses_real_rss_feed(self, tmp_path):
         """Test parsing a real RSS feed from the web."""
         # Use httpbin.org which provides a sample RSS feed
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'https://news.ycombinator.com/rss'],
+            [
+                sys.executable,
+                str(SCRIPT_PATH),
+                "--url",
+                "https://news.ycombinator.com/rss",
+            ],
             cwd=tmp_path,
             capture_output=True,
             text=True,
-            timeout=30
+            timeout=30,
         )
 
         # HN RSS feed should parse successfully
@@ -33,13 +38,13 @@ def test_parses_real_rss_feed(self, tmp_path):
             assert len(content) > 0, "No URLs extracted from real RSS feed"
 
             # Verify at least one URL was extracted
-            lines = content.strip().split('\n')
+            lines = content.strip().split("\n")
             assert len(lines) > 0, "No entries found in RSS feed"
 
     def test_extracts_urls_from_rss_feed(self, tmp_path):
         """Test extracting URLs from an RSS 2.0 feed."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <rss version="2.0">
   <channel>
     <title>Test Feed</title>
@@ -56,35 +61,39 @@ def test_extracts_urls_from_rss_feed(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
+        assert "urls.jsonl" in result.stderr or "urls.jsonl" in result.stdout
 
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
         entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-        titles = {e.get('title') for e in entries}
+        urls = {e["url"] for e in entries}
+        titles = {e.get("title") for e in entries}
 
-        assert 'https://example.com/post/1' in urls
-        assert 'https://example.com/post/2' in urls
-        assert 'First Post' in titles
-        assert 'Second Post' in titles
+        assert "https://example.com/post/1" in urls
+        assert "https://example.com/post/2" in urls
+        assert "First Post" in titles
+        assert "Second Post" in titles
 
     def test_extracts_urls_from_atom_feed(self, tmp_path):
         """Test extracting URLs from an Atom feed."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <title>Test Atom Feed</title>
   <entry>
@@ -98,10 +107,10 @@ def test_extracts_urls_from_atom_feed(self, tmp_path):
     <updated>2024-01-02T12:00:00Z</updated>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -109,50 +118,54 @@ def test_extracts_urls_from_atom_feed(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+        urls = {json.loads(line)["url"] for line in lines}
 
-        assert 'https://atom.example.com/entry/1' in urls
-        assert 'https://atom.example.com/entry/2' in urls
+        assert "https://atom.example.com/entry/1" in urls
+        assert "https://atom.example.com/entry/2" in urls
 
     def test_skips_when_no_entries(self, tmp_path):
         """Test that script returns skipped status when feed has no entries."""
-        input_file = tmp_path / 'empty.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "empty.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <title>Empty Feed</title>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
         assert '"status": "skipped"' in result.stdout
 
     def test_exits_1_when_file_not_found(self, tmp_path):
         """Test that script exits with code 1 when file doesn't exist."""
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/feed.rss'],
+            [sys.executable, str(SCRIPT_PATH), "--url", "file:///nonexistent/feed.rss"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
+        assert "Failed to fetch" in result.stderr
 
     def test_handles_html_entities_in_urls(self, tmp_path):
         """Test that HTML entities in URLs are decoded."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -161,10 +174,10 @@ def test_handles_html_entities_in_urls(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -172,14 +185,18 @@ def test_handles_html_entities_in_urls(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
+        assert entry["url"] == "https://example.com/page?a=1&b=2"
 
     def test_includes_optional_metadata(self, tmp_path):
         """Test that title and timestamp are included when present."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -189,10 +206,10 @@ def test_includes_optional_metadata(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -200,13 +217,17 @@ def test_includes_optional_metadata(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/test'
-        assert entry['title'] == 'Test Title'
+        assert entry["url"] == "https://example.com/test"
+        assert entry["title"] == "Test Title"
         # Parser converts timestamp to bookmarked_at
-        assert 'bookmarked_at' in entry
+        assert "bookmarked_at" in entry
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py b/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
index fbc415f..f1c2b34 100644
--- a/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
+++ b/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
@@ -9,7 +9,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_rss_urls.*"), None)
 
 
 class TestRssVariants:
@@ -17,8 +17,8 @@ class TestRssVariants:
 
     def test_rss_091(self, tmp_path):
         """Test RSS 0.91 format (oldest RSS version)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <rss version="0.91">
   <channel>
     <title>RSS 0.91 Feed</title>
@@ -31,10 +31,10 @@ def test_rss_091(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -42,17 +42,21 @@ def test_rss_091(self, tmp_path):
 
         assert result.returncode == 0, f"Failed: {result.stderr}"
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['url'] == 'https://example.com/article1'
-        assert entry['title'] == 'RSS 0.91 Article'
-        assert entry['plugin'] == 'parse_rss_urls'
+        assert entry["url"] == "https://example.com/article1"
+        assert entry["title"] == "RSS 0.91 Article"
+        assert entry["plugin"] == "parse_rss_urls"
 
     def test_rss_10_rdf(self, tmp_path):
         """Test RSS 1.0 (RDF) format."""
-        input_file = tmp_path / 'feed.rdf'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.rdf"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
          xmlns="http://purl.org/rss/1.0/"
          xmlns:dc="http://purl.org/dc/elements/1.1/">
@@ -72,10 +76,10 @@ def test_rss_10_rdf(self, tmp_path):
     <dc:date>2024-01-16T14:20:00Z</dc:date>
   </item>
 </rdf:RDF>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -83,18 +87,24 @@ def test_rss_10_rdf(self, tmp_path):
 
         assert result.returncode == 0, f"Failed: {result.stderr}"
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-
-        urls = {e['url'] for e in entries}
-        assert 'https://example.com/rdf1' in urls
-        assert 'https://example.com/rdf2' in urls
-        assert any(e.get('bookmarked_at') for e in entries)
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+        entries = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
+
+        urls = {e["url"] for e in entries}
+        assert "https://example.com/rdf1" in urls
+        assert "https://example.com/rdf2" in urls
+        assert any(e.get("bookmarked_at") for e in entries)
 
     def test_rss_20_with_full_metadata(self, tmp_path):
         """Test RSS 2.0 with all standard metadata fields."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <rss version="2.0">
   <channel>
     <title>Full RSS 2.0</title>
@@ -112,10 +122,10 @@ def test_rss_20_with_full_metadata(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -124,21 +134,26 @@ def test_rss_20_with_full_metadata(self, tmp_path):
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
         content = result.stdout.strip()
-        lines = content.split('\n')
+        lines = content.split("\n")
 
         # Check for Tag records
-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'Technology' in tag_names
-        assert 'Programming' in tag_names
+        tags = [json.loads(line) for line in lines if json.loads(line)["type"] == "Tag"]
+        tag_names = {t["name"] for t in tags}
+        assert "Technology" in tag_names
+        assert "Programming" in tag_names
 
         # Check Snapshot record
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        snapshots = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
         entry = snapshots[0]
-        assert entry['url'] == 'https://example.com/complete'
-        assert entry['title'] == 'Complete Article'
-        assert 'bookmarked_at' in entry
-        assert entry['tags'] == 'Technology,Programming' or entry['tags'] == 'Programming,Technology'
+        assert entry["url"] == "https://example.com/complete"
+        assert entry["title"] == "Complete Article"
+        assert "bookmarked_at" in entry
+        assert (
+            entry["tags"] == "Technology,Programming"
+            or entry["tags"] == "Programming,Technology"
+        )
 
 
 class TestAtomVariants:
@@ -146,8 +161,8 @@ class TestAtomVariants:
 
     def test_atom_10_full(self, tmp_path):
         """Test Atom 1.0 with full metadata."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <title>Atom 1.0 Feed</title>
   <updated>2024-01-15T00:00:00Z</updated>
@@ -161,10 +176,10 @@ def test_atom_10_full(self, tmp_path):
     <category term="research"/>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -172,22 +187,28 @@ def test_atom_10_full(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'science' in tag_names
-        assert 'research' in tag_names
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
+
+        tags = [
+            json.loads(line) for line in lines if json.loads(line).get("type") == "Tag"
+        ]
+        tag_names = {t["name"] for t in tags}
+        assert "science" in tag_names
+        assert "research" in tag_names
+
+        snapshots = [
+            json.loads(line)
+            for line in lines
+            if json.loads(line).get("type") == "Snapshot"
+        ]
         entry = snapshots[0]
-        assert entry['url'] == 'https://atom.example.com/1'
-        assert 'bookmarked_at' in entry
+        assert entry["url"] == "https://atom.example.com/1"
+        assert "bookmarked_at" in entry
 
     def test_atom_with_alternate_link(self, tmp_path):
         """Test Atom feed with alternate link types."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <title>Atom Alternate Links</title>
   <entry>
@@ -197,10 +218,10 @@ def test_atom_with_alternate_link(self, tmp_path):
     <updated>2024-01-15T10:30:00Z</updated>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -208,10 +229,14 @@ def test_atom_with_alternate_link(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
         # feedparser should pick the alternate link
-        assert 'atom.example.com/article' in entry['url']
+        assert "atom.example.com/article" in entry["url"]
 
 
 class TestDateFormats:
@@ -219,8 +244,8 @@ class TestDateFormats:
 
     def test_rfc822_date(self, tmp_path):
         """Test RFC 822 date format (RSS 2.0 standard)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -230,10 +255,10 @@ def test_rfc822_date(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -241,15 +266,19 @@ def test_rfc822_date(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert 'bookmarked_at' in entry
-        assert '2020-01-15' in entry['bookmarked_at']
+        assert "bookmarked_at" in entry
+        assert "2020-01-15" in entry["bookmarked_at"]
 
     def test_iso8601_date(self, tmp_path):
         """Test ISO 8601 date format (Atom standard)."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <entry>
     <title>ISO 8601 Date</title>
@@ -257,10 +286,10 @@ def test_iso8601_date(self, tmp_path):
     <published>2024-01-15T10:30:45.123Z</published>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -268,15 +297,19 @@ def test_iso8601_date(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert 'bookmarked_at' in entry
-        assert '2024-01-15' in entry['bookmarked_at']
+        assert "bookmarked_at" in entry
+        assert "2024-01-15" in entry["bookmarked_at"]
 
     def test_updated_vs_published_date(self, tmp_path):
         """Test that published date is preferred over updated date."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <entry>
     <title>Date Priority Test</title>
@@ -285,10 +318,10 @@ def test_updated_vs_published_date(self, tmp_path):
     <updated>2024-01-15T10:00:00Z</updated>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -296,15 +329,19 @@ def test_updated_vs_published_date(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
         # Should use published date (Jan 10) not updated date (Jan 15)
-        assert '2024-01-10' in entry['bookmarked_at']
+        assert "2024-01-10" in entry["bookmarked_at"]
 
     def test_only_updated_date(self, tmp_path):
         """Test fallback to updated date when published is missing."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <entry>
     <title>Only Updated</title>
@@ -312,10 +349,10 @@ def test_only_updated_date(self, tmp_path):
     <updated>2024-01-20T10:00:00Z</updated>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -323,14 +360,18 @@ def test_only_updated_date(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert '2024-01-20' in entry['bookmarked_at']
+        assert "2024-01-20" in entry["bookmarked_at"]
 
     def test_no_date(self, tmp_path):
         """Test entries without any date."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -339,10 +380,10 @@ def test_no_date(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -350,9 +391,13 @@ def test_no_date(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert 'bookmarked_at' not in entry
+        assert "bookmarked_at" not in entry
 
 
 class TestTagsAndCategories:
@@ -360,8 +405,8 @@ class TestTagsAndCategories:
 
     def test_rss_categories(self, tmp_path):
         """Test RSS 2.0 category elements."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -373,10 +418,10 @@ def test_rss_categories(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -384,23 +429,29 @@ def test_rss_categories(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'Tech' in tag_names
-        assert 'Web' in tag_names
-        assert 'Programming' in tag_names
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
+
+        tags = [
+            json.loads(line) for line in lines if json.loads(line).get("type") == "Tag"
+        ]
+        tag_names = {t["name"] for t in tags}
+        assert "Tech" in tag_names
+        assert "Web" in tag_names
+        assert "Programming" in tag_names
+
+        snapshots = [
+            json.loads(line)
+            for line in lines
+            if json.loads(line).get("type") == "Snapshot"
+        ]
         entry = snapshots[0]
-        tags_list = entry['tags'].split(',')
+        tags_list = entry["tags"].split(",")
         assert len(tags_list) == 3
 
     def test_atom_categories(self, tmp_path):
         """Test Atom category elements with various attributes."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <entry>
     <title>Atom Categories</title>
@@ -410,10 +461,10 @@ def test_atom_categories(self, tmp_path):
     <updated>2024-01-15T10:00:00Z</updated>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -421,18 +472,20 @@ def test_atom_categories(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
 
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
+        tags = [
+            json.loads(line) for line in lines if json.loads(line).get("type") == "Tag"
+        ]
+        tag_names = {t["name"] for t in tags}
         # feedparser extracts the 'term' attribute
-        assert 'python' in tag_names
-        assert 'django' in tag_names
+        assert "python" in tag_names
+        assert "django" in tag_names
 
     def test_no_tags(self, tmp_path):
         """Test entries without tags."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -441,10 +494,10 @@ def test_no_tags(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -452,14 +505,18 @@ def test_no_tags(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert 'tags' not in entry or entry['tags'] == ''
+        assert "tags" not in entry or entry["tags"] == ""
 
     def test_duplicate_tags(self, tmp_path):
         """Test that duplicate tags are handled properly."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -471,10 +528,10 @@ def test_duplicate_tags(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -482,11 +539,13 @@ def test_duplicate_tags(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
+        tags = [
+            json.loads(line) for line in lines if json.loads(line).get("type") == "Tag"
+        ]
         # Tag records should be unique
-        tag_names = [t['name'] for t in tags]
-        assert tag_names.count('Python') == 1
+        tag_names = [t["name"] for t in tags]
+        assert tag_names.count("Python") == 1
 
 
 class TestCustomNamespaces:
@@ -494,8 +553,8 @@ class TestCustomNamespaces:
 
     def test_dublin_core_metadata(self, tmp_path):
         """Test Dublin Core namespace fields."""
-        input_file = tmp_path / 'feed.rdf'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.rdf"
+        input_file.write_text("""<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
          xmlns="http://purl.org/rss/1.0/"
          xmlns:dc="http://purl.org/dc/elements/1.1/">
@@ -511,10 +570,10 @@ def test_dublin_core_metadata(self, tmp_path):
     <dc:rights>Copyright 2024</dc:rights>
   </item>
 </rdf:RDF>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -522,19 +581,25 @@ def test_dublin_core_metadata(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+        snapshots = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
         entry = snapshots[0]
 
-        assert entry['url'] == 'https://example.com/dc1'
-        assert entry['title'] == 'Dublin Core Article'
+        assert entry["url"] == "https://example.com/dc1"
+        assert entry["title"] == "Dublin Core Article"
         # feedparser should parse dc:date as bookmarked_at
-        assert 'bookmarked_at' in entry
+        assert "bookmarked_at" in entry
 
     def test_media_rss_namespace(self, tmp_path):
         """Test Media RSS namespace (common in podcast feeds)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
   <channel>
     <title>Media RSS Feed</title>
@@ -547,10 +612,10 @@ def test_media_rss_namespace(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -558,16 +623,20 @@ def test_media_rss_namespace(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['url'] == 'https://example.com/podcast/1'
-        assert entry['title'] == 'Podcast Episode 1'
+        assert entry["url"] == "https://example.com/podcast/1"
+        assert entry["title"] == "Podcast Episode 1"
 
     def test_itunes_namespace(self, tmp_path):
         """Test iTunes namespace (common in podcast feeds)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
   <channel>
     <title>iTunes Podcast</title>
@@ -581,10 +650,10 @@ def test_itunes_namespace(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -592,12 +661,18 @@ def test_itunes_namespace(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+        snapshots = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
         entry = snapshots[0]
 
-        assert entry['url'] == 'https://example.com/ep1'
-        assert entry['title'] == 'Episode 1: Getting Started'
+        assert entry["url"] == "https://example.com/ep1"
+        assert entry["title"] == "Episode 1: Getting Started"
 
 
 class TestEdgeCases:
@@ -605,8 +680,8 @@ class TestEdgeCases:
 
     def test_missing_title(self, tmp_path):
         """Test entries without title."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -615,10 +690,10 @@ def test_missing_title(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -626,16 +701,20 @@ def test_missing_title(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['url'] == 'https://example.com/notitle'
-        assert 'title' not in entry
+        assert entry["url"] == "https://example.com/notitle"
+        assert "title" not in entry
 
     def test_missing_link(self, tmp_path):
         """Test entries without link (should be skipped)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -648,10 +727,10 @@ def test_missing_link(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -659,17 +738,21 @@ def test_missing_link(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
         # Should only have the entry with a link
-        assert entry['url'] == 'https://example.com/haslink'
-        assert '1 URL' in result.stdout
+        assert entry["url"] == "https://example.com/haslink"
+        assert len(lines) == 1
 
     def test_html_entities_in_title(self, tmp_path):
         """Test HTML entities in titles are properly decoded."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -678,10 +761,10 @@ def test_html_entities_in_title(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -689,15 +772,19 @@ def test_html_entities_in_title(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert entry['title'] == 'Using <div> & <span> tags'
+        assert entry["title"] == "Using <div> & <span> tags"
 
     def test_special_characters_in_tags(self, tmp_path):
         """Test special characters in tags."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -709,10 +796,10 @@ def test_special_characters_in_tags(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -720,18 +807,20 @@ def test_special_characters_in_tags(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
 
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'C++' in tag_names
-        assert 'Node.js' in tag_names
-        assert 'Web/Mobile' in tag_names
+        tags = [
+            json.loads(line) for line in lines if json.loads(line).get("type") == "Tag"
+        ]
+        tag_names = {t["name"] for t in tags}
+        assert "C++" in tag_names
+        assert "Node.js" in tag_names
+        assert "Web/Mobile" in tag_names
 
     def test_cdata_sections(self, tmp_path):
         """Test CDATA sections in titles and descriptions."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -741,10 +830,10 @@ def test_cdata_sections(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -752,17 +841,21 @@ def test_cdata_sections(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
         # feedparser should strip HTML tags
-        assert 'HTML' in entry['title']
-        assert entry['url'] == 'https://example.com/cdata'
+        assert "HTML" in entry["title"]
+        assert entry["url"] == "https://example.com/cdata"
 
     def test_relative_urls(self, tmp_path):
         """Test that relative URLs are preserved (feedparser handles them)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <link>https://example.com</link>
@@ -772,10 +865,10 @@ def test_relative_urls(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -783,16 +876,21 @@ def test_relative_urls(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
         # feedparser may convert relative to absolute, or leave as-is
-        assert 'article/relative' in entry['url']
+        assert "article/relative" in entry["url"]
 
     def test_unicode_characters(self, tmp_path):
         """Test Unicode characters in feed content."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text(
+            """<?xml version="1.0" encoding="UTF-8"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -803,10 +901,12 @@ def test_unicode_characters(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''', encoding='utf-8')
+        """,
+            encoding="utf-8",
+        )
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -814,18 +914,20 @@ def test_unicode_characters(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
 
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        snapshots = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
         entry = snapshots[0]
-        assert '日本語' in entry['title']
-        assert 'Français' in entry['title']
+        assert "日本語" in entry["title"]
+        assert "Français" in entry["title"]
 
     def test_very_long_title(self, tmp_path):
         """Test handling of very long titles."""
-        long_title = 'A' * 1000
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text(f'''<?xml version="1.0"?>
+        long_title = "A" * 1000
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text(f"""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <item>
@@ -834,10 +936,10 @@ def test_very_long_title(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -845,51 +947,61 @@ def test_very_long_title(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert len(entry['title']) == 1000
-        assert entry['title'] == long_title
+        assert len(entry["title"]) == 1000
+        assert entry["title"] == long_title
 
     def test_multiple_entries_batch(self, tmp_path):
         """Test processing a large batch of entries."""
         items = []
         for i in range(100):
-            items.append(f'''
+            items.append(f"""
     <item>
       <title>Article {i}</title>
       <link>https://example.com/article/{i}</link>
       <category>Tag{i % 10}</category>
       <pubDate>Mon, {15 + (i % 15)} Jan 2024 10:00:00 GMT</pubDate>
     </item>
-            ''')
+            """)
 
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text(f'''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text(f"""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <title>Large Feed</title>
-    {''.join(items)}
+    {"".join(items)}
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
+        assert "urls.jsonl" in result.stderr or "urls.jsonl" in result.stdout
 
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
+        lines = [line for line in result.stdout.strip().split("\n") if line.strip()]
 
         # Should have 10 unique tags (Tag0-Tag9) + 100 snapshots
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
+        tags = [
+            json.loads(line) for line in lines if json.loads(line).get("type") == "Tag"
+        ]
+        snapshots = [
+            json.loads(line)
+            for line in lines
+            if json.loads(line).get("type") == "Snapshot"
+        ]
 
         assert len(tags) == 10
         assert len(snapshots) == 100
@@ -900,8 +1012,8 @@ class TestRealWorldFeeds:
 
     def test_medium_style_feed(self, tmp_path):
         """Test Medium-style feed structure."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <rss version="2.0">
   <channel>
     <title>Medium Feed</title>
@@ -916,10 +1028,10 @@ def test_medium_style_feed(self, tmp_path):
     </item>
   </channel>
 </rss>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -927,17 +1039,23 @@ def test_medium_style_feed(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+
+        snapshots = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
         entry = snapshots[0]
-        assert 'medium.com' in entry['url']
-        assert entry['title'] == 'Article Title'
+        assert "medium.com" in entry["url"]
+        assert entry["title"] == "Article Title"
 
     def test_reddit_style_feed(self, tmp_path):
         """Test Reddit-style feed structure."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.rss"
+        input_file.write_text("""<?xml version="1.0"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
   <title>Reddit Feed</title>
   <entry>
@@ -948,10 +1066,10 @@ def test_reddit_style_feed(self, tmp_path):
     <id>t3_abc123</id>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -959,16 +1077,22 @@ def test_reddit_style_feed(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
+
+        snapshots = [
+            json.loads(line) for line in lines if json.loads(line)["type"] == "Snapshot"
+        ]
         entry = snapshots[0]
-        assert 'reddit.com' in entry['url']
+        assert "reddit.com" in entry["url"]
 
     def test_youtube_style_feed(self, tmp_path):
         """Test YouTube-style feed structure."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
+        input_file = tmp_path / "feed.atom"
+        input_file.write_text("""<?xml version="1.0"?>
 <feed xmlns:yt="http://www.youtube.com/xml/schemas/2015"
       xmlns="http://www.w3.org/2005/Atom">
   <title>YouTube Channel</title>
@@ -980,10 +1104,10 @@ def test_youtube_style_feed(self, tmp_path):
     <yt:channelId>UCxxxxxxxx</yt:channelId>
   </entry>
 </feed>
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
@@ -991,12 +1115,16 @@ def test_youtube_style_feed(self, tmp_path):
 
         assert result.returncode == 0
         # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
 
-        assert 'youtube.com' in entry['url']
-        assert 'dQw4w9WgXcQ' in entry['url']
+        assert "youtube.com" in entry["url"]
+        assert "dQw4w9WgXcQ" in entry["url"]
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py b/abx_plugins/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
index 21cff18..eb7afd3 100755
--- a/abx_plugins/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
+++ b/abx_plugins/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
@@ -23,37 +23,35 @@
 import os
 import re
 import sys
-from datetime import datetime, timezone
 from html import unescape
 from pathlib import Path
 from urllib.parse import urlparse
-from urllib.request import urlopen
 
 import rich_click as click
 
-PLUGIN_NAME = 'parse_txt_urls'
+PLUGIN_NAME = "parse_txt_urls"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-URLS_FILE = Path('urls.jsonl')
+URLS_FILE = Path("urls.jsonl")
 
 # URL regex from archivebox/misc/util.py
 # https://mathiasbynens.be/demo/url-regex
 URL_REGEX = re.compile(
-    r'(?=('
-    r'http[s]?://'                     # start matching from allowed schemes
-    r'(?:[a-zA-Z]|[0-9]'               # followed by allowed alphanum characters
-    r'|[-_$@.&+!*\(\),]'               #   or allowed symbols (keep hyphen first to match literal hyphen)
-    r'|[^\u0000-\u007F])+'             #   or allowed unicode bytes
-    r'[^\]\[<>"\'\s]+'                 # stop parsing at these symbols
-    r'))',
+    r"(?=("
+    r"http[s]?://"  # start matching from allowed schemes
+    r"(?:[a-zA-Z]|[0-9]"  # followed by allowed alphanum characters
+    r"|[-_$@.&+!*\(\),]"  #   or allowed symbols (keep hyphen first to match literal hyphen)
+    r"|[^\u0000-\u007F])+"  #   or allowed unicode bytes
+    r'[^\]\[<>"\'\s]+'  # stop parsing at these symbols
+    r"))",
     re.IGNORECASE | re.UNICODE,
 )
 
 
-def parens_are_matched(string: str, open_char='(', close_char=')') -> bool:
+def parens_are_matched(string: str, open_char="(", close_char=")") -> bool:
     """Check that all parentheses in a string are balanced and nested properly."""
     count = 0
     for c in string:
@@ -94,41 +92,49 @@ def fetch_content(url: str) -> str:
     """Fetch content from a URL (supports file:// and https://)."""
     parsed = urlparse(url)
 
-    if parsed.scheme == 'file':
+    if parsed.scheme == "file":
         # Local file
         file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
             return f.read()
     else:
         # Remote URL
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
+        timeout = int(os.environ.get("TIMEOUT", "60"))
+        user_agent = os.environ.get(
+            "USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)"
+        )
 
         import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
+
+        req = urllib.request.Request(url, headers={"User-Agent": user_agent})
         with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
+            return response.read().decode("utf-8", errors="replace")
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to parse (file:// or https://)')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
+@click.option("--url", required=True, help="URL to parse (file:// or https://)")
+@click.option("--snapshot-id", required=False, help="Parent Snapshot UUID")
+@click.option("--crawl-id", required=False, help="Crawl UUID")
+@click.option("--depth", type=int, default=0, help="Current depth level")
+def main(
+    url: str,
+    snapshot_id: str | None = None,
+    crawl_id: str | None = None,
+    depth: int = 0,
+):
     """Parse plain text and extract URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    env_depth = os.environ.get("SNAPSHOT_DEPTH")
     if env_depth is not None:
         try:
             depth = int(env_depth)
         except Exception:
             pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
+    crawl_id = crawl_id or os.environ.get("CRAWL_ID")
 
     try:
         content = fetch_content(url)
     except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
+        click.echo(f"Failed to fetch {url}: {e}", err=True)
         sys.exit(1)
 
     urls_found = set()
@@ -142,26 +148,28 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     records = []
     for found_url in sorted(urls_found):
         record = {
-            'type': 'Snapshot',
-            'url': found_url,
-            'plugin': PLUGIN_NAME,
-            'depth': depth + 1,
+            "type": "Snapshot",
+            "url": found_url,
+            "plugin": PLUGIN_NAME,
+            "depth": depth + 1,
         }
         if snapshot_id:
-            record['parent_snapshot_id'] = snapshot_id
+            record["parent_snapshot_id"] = snapshot_id
         if crawl_id:
-            record['crawl_id'] = crawl_id
+            record["crawl_id"] = crawl_id
         records.append(record)
         print(json.dumps(record))
 
     # Emit ArchiveResult record to mark completion
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in records) + ('\n' if records else ''))
-    status = 'succeeded' if urls_found else 'skipped'
+    URLS_FILE.write_text(
+        "\n".join(json.dumps(r) for r in records) + ("\n" if records else "")
+    )
+    status = "succeeded" if urls_found else "skipped"
     output_str = URLS_FILE.name
     ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output_str,
     }
     print(json.dumps(ar_record))
 
@@ -169,5 +177,5 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/parse_txt_urls/tests/test_parse_txt_urls.py b/abx_plugins/plugins/parse_txt_urls/tests/test_parse_txt_urls.py
index a3b5328..93ba48d 100644
--- a/abx_plugins/plugins/parse_txt_urls/tests/test_parse_txt_urls.py
+++ b/abx_plugins/plugins/parse_txt_urls/tests/test_parse_txt_urls.py
@@ -9,7 +9,7 @@
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_txt_urls.*'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob("on_Snapshot__*_parse_txt_urls.*"), None)
 
 
 class TestParseTxtUrls:
@@ -17,38 +17,42 @@ class TestParseTxtUrls:
 
     def test_extracts_urls_including_real_example_com(self, tmp_path):
         """Test extracting URLs from plain text including real example.com."""
-        input_file = tmp_path / 'urls.txt'
-        input_file.write_text('''
+        input_file = tmp_path / "urls.txt"
+        input_file.write_text("""
 https://example.com
 https://example.com/page
 https://www.iana.org/domains/reserved
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0, f"Failed: {result.stderr}"
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
 
         # Parse Snapshot records from stdout
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip() and '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 3
 
         urls = set()
         for line in lines:
             entry = json.loads(line)
-            assert entry['type'] == 'Snapshot'
-            assert 'url' in entry
-            urls.add(entry['url'])
+            assert entry["type"] == "Snapshot"
+            assert "url" in entry
+            urls.add(entry["url"])
 
         # Verify real URLs are extracted correctly
-        assert 'https://example.com' in urls
-        assert 'https://example.com/page' in urls
-        assert 'https://www.iana.org/domains/reserved' in urls
+        assert "https://example.com" in urls
+        assert "https://example.com/page" in urls
+        assert "https://www.iana.org/domains/reserved" in urls
 
         # Verify ArchiveResult record
         assert '"type": "ArchiveResult"' in result.stdout
@@ -56,138 +60,158 @@ def test_extracts_urls_including_real_example_com(self, tmp_path):
 
     def test_extracts_urls_from_mixed_content(self, tmp_path):
         """Test extracting URLs embedded in prose text."""
-        input_file = tmp_path / 'mixed.txt'
-        input_file.write_text('''
+        input_file = tmp_path / "mixed.txt"
+        input_file.write_text("""
 Check out this great article at https://blog.example.com/post
 You can also visit http://docs.test.org for more info.
 Also see https://github.com/user/repo for the code.
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
+        urls = {json.loads(line)["url"] for line in lines}
 
-        assert 'https://blog.example.com/post' in urls
-        assert 'http://docs.test.org' in urls
-        assert 'https://github.com/user/repo' in urls
+        assert "https://blog.example.com/post" in urls
+        assert "http://docs.test.org" in urls
+        assert "https://github.com/user/repo" in urls
 
     def test_handles_markdown_urls(self, tmp_path):
         """Test handling URLs in markdown format with parentheses."""
-        input_file = tmp_path / 'markdown.txt'
-        input_file.write_text('''
+        input_file = tmp_path / "markdown.txt"
+        input_file.write_text("""
 [Example](https://example.com/page)
 [Wiki](https://en.wikipedia.org/wiki/Article_(Disambiguation))
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
+        urls = {json.loads(line)["url"] for line in lines}
 
-        assert 'https://example.com/page' in urls
-        assert any('wikipedia.org' in u for u in urls)
+        assert "https://example.com/page" in urls
+        assert any("wikipedia.org" in u for u in urls)
 
     def test_skips_when_no_urls_found(self, tmp_path):
         """Test that script returns skipped status when no URLs found."""
-        input_file = tmp_path / 'empty.txt'
-        input_file.write_text('no urls here, just plain text')
+        input_file = tmp_path / "empty.txt"
+        input_file.write_text("no urls here, just plain text")
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
+        assert "urls.jsonl" in result.stderr
         assert '"status": "skipped"' in result.stdout
 
     def test_exits_1_when_file_not_found(self, tmp_path):
         """Test that script exits with code 1 when file doesn't exist."""
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/path.txt'],
+            [sys.executable, str(SCRIPT_PATH), "--url", "file:///nonexistent/path.txt"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
+        assert "Failed to fetch" in result.stderr
 
     def test_deduplicates_urls(self, tmp_path):
         """Test that duplicate URLs are deduplicated."""
-        input_file = tmp_path / 'dupes.txt'
-        input_file.write_text('''
+        input_file = tmp_path / "dupes.txt"
+        input_file.write_text("""
 https://example.com
 https://example.com
 https://example.com
 https://other.com
-        ''')
+        """)
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
     def test_outputs_to_stdout(self, tmp_path):
         """Test that output goes to stdout in JSONL format."""
-        input_file = tmp_path / 'urls.txt'
-        input_file.write_text('https://new.com\nhttps://other.com')
+        input_file = tmp_path / "urls.txt"
+        input_file.write_text("https://new.com\nhttps://other.com")
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         assert len(lines) == 2
 
-        urls = {json.loads(line)['url'] for line in lines}
-        assert 'https://new.com' in urls
-        assert 'https://other.com' in urls
+        urls = {json.loads(line)["url"] for line in lines}
+        assert "https://new.com" in urls
+        assert "https://other.com" in urls
 
     def test_output_is_valid_json(self, tmp_path):
         """Test that output contains required fields."""
-        input_file = tmp_path / 'urls.txt'
-        input_file.write_text('https://example.com')
+        input_file = tmp_path / "urls.txt"
+        input_file.write_text("https://example.com")
 
         result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
+            [sys.executable, str(SCRIPT_PATH), "--url", f"file://{input_file}"],
             cwd=tmp_path,
             capture_output=True,
             text=True,
         )
 
         assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
+        lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if '"type": "Snapshot"' in line
+        ]
         entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert entry['type'] == 'Snapshot'
-        assert entry['plugin'] == 'parse_txt_urls'
+        assert entry["url"] == "https://example.com"
+        assert entry["type"] == "Snapshot"
+        assert entry["plugin"] == "parse_txt_urls"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/path_utils.py b/abx_plugins/plugins/path_utils.py
index 4180d71..8c23361 100644
--- a/abx_plugins/plugins/path_utils.py
+++ b/abx_plugins/plugins/path_utils.py
@@ -13,10 +13,10 @@ def get_lib_dir() -> Path:
 
     Priority: LIB_DIR env var, otherwise ~/.config/abx/lib.
     """
-    lib_dir = os.environ.get('LIB_DIR', '').strip()
+    lib_dir = os.environ.get("LIB_DIR", "").strip()
     if lib_dir:
         return _resolve_path(lib_dir)
-    return _resolve_path(str(Path.home() / '.config' / 'abx' / 'lib'))
+    return _resolve_path(str(Path.home() / ".config" / "abx" / "lib"))
 
 
 def get_personas_dir() -> Path:
@@ -24,7 +24,7 @@ def get_personas_dir() -> Path:
 
     Priority: PERSONAS_DIR env var, otherwise ~/.config/abx/personas.
     """
-    personas_dir = os.environ.get('PERSONAS_DIR', '').strip()
+    personas_dir = os.environ.get("PERSONAS_DIR", "").strip()
     if personas_dir:
         return _resolve_path(personas_dir)
-    return _resolve_path(str(Path.home() / '.config' / 'abx' / 'personas'))
+    return _resolve_path(str(Path.home() / ".config" / "abx" / "personas"))
diff --git a/abx_plugins/plugins/pdf/on_Snapshot__52_pdf.js b/abx_plugins/plugins/pdf/on_Snapshot__52_pdf.js
index 8f4a5ba..51ac3de 100644
--- a/abx_plugins/plugins/pdf/on_Snapshot__52_pdf.js
+++ b/abx_plugins/plugins/pdf/on_Snapshot__52_pdf.js
@@ -18,8 +18,11 @@ if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_
 
 const {
     getEnvBool,
+    getEnvInt,
     parseArgs,
     readCdpUrl,
+    connectToPage,
+    waitForPageLoaded,
 } = require('../chrome/chrome_utils.js');
 
 // Check if PDF is enabled BEFORE requiring puppeteer
@@ -64,48 +67,26 @@ function hasStaticFileOutput() {
     return false;
 }
 
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-async function printToPdf(url) {
+async function printToPdf(url, timeoutMs) {
     // Output directory is current directory (hook already runs in output dir)
     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
 
     let browser = null;
-    let page = null;
 
     try {
-        // Connect to existing Chrome session (required)
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
+        if (!readCdpUrl(CHROME_SESSION_DIR)) {
             return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
         }
 
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-            defaultViewport: null,
+        const connection = await connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs,
+            puppeteer,
         });
+        browser = connection.browser;
+        const page = connection.page;
 
-        // Get existing pages or create new one
-        const pages = await browser.pages();
-        page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            page = await browser.newPage();
-        }
+        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs * 4, 200);
 
         // Print to PDF
         await page.pdf({
@@ -158,18 +139,9 @@ async function main() {
             process.exit(0);
         }
 
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            throw new Error('No Chrome session found (chrome plugin must run first)');
-        }
-
-        // Wait for page to be fully loaded
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
+        const timeoutMs = getEnvInt('PDF_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
 
-        const result = await printToPdf(url);
+        const result = await printToPdf(url, timeoutMs);
 
         if (result.success) {
             // Success - emit ArchiveResult
diff --git a/abx_plugins/plugins/pdf/tests/test_pdf.py b/abx_plugins/plugins/pdf/tests/test_pdf.py
index 48efab0..4b72e86 100644
--- a/abx_plugins/plugins/pdf/tests/test_pdf.py
+++ b/abx_plugins/plugins/pdf/tests/test_pdf.py
@@ -13,30 +13,30 @@
 """
 
 import json
-import os
 import subprocess
-import sys
 import tempfile
 from pathlib import Path
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
     get_plugin_dir,
     get_hook_script,
-    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
     PLUGINS_ROOT,
     chrome_session,
 )
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-PDF_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_pdf.*')
-NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__install_using_npm_provider.py'
-TEST_URL = 'https://example.com'
+_PDF_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_pdf.*")
+if _PDF_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+PDF_HOOK = _PDF_HOOK
+NPM_PROVIDER_HOOK = PLUGINS_ROOT / "npm" / "on_Binary__install_using_npm_provider.py"
+TEST_URL = "https://example.com"
 
 
 def test_hook_script_exists():
@@ -46,46 +46,54 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
+    from abx_pkg import Binary, EnvProvider
 
     # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_binary = Binary(name="node", binproviders=[EnvProvider()])
     node_loaded = node_binary.load()
     assert node_loaded and node_loaded.abspath, "Node.js required for pdf plugin"
 
 
-def test_extracts_pdf_from_example_com():
-    """Test full workflow: extract PDF from real example.com via hook."""
+def test_extracts_pdf_from_example_com(chrome_test_url):
+    """Test full workflow: extract PDF from deterministic local fixture via hook."""
     # Prerequisites checked by earlier test
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            pdf_dir = snapshot_chrome_dir.parent / 'pdf'
+        with chrome_session(tmpdir, test_url=chrome_test_url, timeout=30) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            pdf_dir = snapshot_chrome_dir.parent / "pdf"
             pdf_dir.mkdir(exist_ok=True)
 
             # Run PDF extraction hook
             result = subprocess.run(
-                ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+                [
+                    "node",
+                    str(PDF_HOOK),
+                    f"--url={chrome_test_url}",
+                    "--snapshot-id=test789",
+                ],
                 cwd=pdf_dir,
                 capture_output=True,
                 text=True,
                 timeout=120,
-                env=env
+                env=env,
             )
 
         # Parse clean JSONL output (hook might fail due to network issues)
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
@@ -94,104 +102,129 @@ def test_extracts_pdf_from_example_com():
         assert result_json, "Should have ArchiveResult JSONL output"
 
         # Skip verification if network failed
-        if result_json['status'] != 'succeeded':
+        if result_json["status"] != "succeeded":
             pass
-            if 'TIMED_OUT' in result_json.get('output_str', '') or 'timeout' in result_json.get('output_str', '').lower():
+            if (
+                "TIMED_OUT" in result_json.get("output_str", "")
+                or "timeout" in result_json.get("output_str", "").lower()
+            ):
                 pass
             pytest.fail(f"Extraction failed: {result_json}")
 
         assert result.returncode == 0, f"Should exit 0 on success: {result.stderr}"
 
         # Verify filesystem output (hook writes to current directory)
-        pdf_file = pdf_dir / 'output.pdf'
+        pdf_file = pdf_dir / "output.pdf"
         assert pdf_file.exists(), "output.pdf not created"
 
         # Verify file is valid PDF
         file_size = pdf_file.stat().st_size
         assert file_size > 500, f"PDF too small: {file_size} bytes"
-        assert file_size < 10 * 1024 * 1024, f"PDF suspiciously large: {file_size} bytes"
+        assert file_size < 10 * 1024 * 1024, (
+            f"PDF suspiciously large: {file_size} bytes"
+        )
 
         # Check PDF magic bytes
         pdf_data = pdf_file.read_bytes()
-        assert pdf_data[:4] == b'%PDF', "Should be valid PDF file"
+        assert pdf_data[:4] == b"%PDF", "Should be valid PDF file"
 
 
 def test_config_save_pdf_false_skips():
     """Test that PDF_ENABLED=False exits without emitting JSONL."""
-    import os
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
-        env['PDF_ENABLED'] = 'False'
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
+        env["PDF_ENABLED"] = "False"
 
         result = subprocess.run(
-            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
+            ["node", str(PDF_HOOK), f"--url={TEST_URL}", "--snapshot-id=test999"],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_reports_missing_chrome():
     """Test that script reports error when Chrome session is missing."""
-    import os
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
-        pdf_dir = snap_dir / 'pdf'
+        snap_dir = tmpdir / "snap"
+        pdf_dir = snap_dir / "pdf"
         pdf_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
 
         result = subprocess.run(
-            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
+            ["node", str(PDF_HOOK), f"--url={TEST_URL}", "--snapshot-id=test123"],
             cwd=pdf_dir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         assert result.returncode != 0, "Should fail without shared Chrome session"
         combined = result.stdout + result.stderr
-        assert 'chrome session' in combined.lower() or 'chrome plugin' in combined.lower()
+        assert (
+            "chrome session" in combined.lower() or "chrome plugin" in combined.lower()
+        )
 
 
-def test_runs_with_shared_chrome_session():
+def test_runs_with_shared_chrome_session(chrome_test_url):
     """Test that PDF hook completes when shared Chrome session is available."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            pdf_dir = snapshot_chrome_dir.parent / 'pdf'
+        with chrome_session(tmpdir, test_url=chrome_test_url, timeout=30) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            pdf_dir = snapshot_chrome_dir.parent / "pdf"
             pdf_dir.mkdir(exist_ok=True)
 
             result = subprocess.run(
-                ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
+                [
+                    "node",
+                    str(PDF_HOOK),
+                    f"--url={chrome_test_url}",
+                    "--snapshot-id=testtimeout",
+                ],
                 cwd=pdf_dir,
                 capture_output=True,
                 text=True,
                 env=env,
-                timeout=30
+                timeout=30,
             )
 
         # Should complete (success or fail, but not hang)
         assert result.returncode in (0, 1), "Should complete without hanging"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/pip/on_Binary__11_pip_install.py b/abx_plugins/plugins/pip/on_Binary__11_pip_install.py
index 31795e4..f014fa2 100755
--- a/abx_plugins/plugins/pip/on_Binary__11_pip_install.py
+++ b/abx_plugins/plugins/pip/on_Binary__11_pip_install.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env -S uv run --script
 # /// script
-# requires-python = ">=3.12"
+# requires-python = ">=3.11"
 # dependencies = [
 #   "click",
 #   "rich-click",
@@ -24,47 +24,52 @@
 from pathlib import Path
 
 import rich_click as click
-from abx_pkg import Binary, PipProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-PipProvider.model_rebuild()
+from abx_pkg import Binary, PipProvider
 
 
 @click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
+@click.option("--binary-id", required=True, help="Binary UUID")
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--name", required=True, help="Binary name to install")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--overrides", default=None, help="JSON-encoded overrides dict")
+def main(
+    binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None
+):
     """Install binary using pip."""
 
     # Check if pip provider is allowed
-    if binproviders != '*' and 'pip' not in binproviders.split(','):
+    if binproviders != "*" and "pip" not in binproviders.split(","):
         click.echo(f"pip provider not allowed for {name}", err=True)
         sys.exit(0)
 
     # Get LIB_DIR from environment (optional)
-    lib_dir = os.environ.get('LIB_DIR', '').strip()
+    lib_dir = os.environ.get("LIB_DIR", "").strip()
     if not lib_dir:
-        lib_dir = str(Path.home() / '.config' / 'abx' / 'lib')
+        lib_dir = str(Path.home() / ".config" / "abx" / "lib")
 
     # Structure: lib/arm64-darwin/pip/venv (PipProvider will create venv automatically)
-    pip_venv_path = Path(lib_dir) / 'pip' / 'venv'
+    pip_venv_path = Path(lib_dir) / "pip" / "venv"
     pip_venv_path.parent.mkdir(parents=True, exist_ok=True)
-    venv_python = pip_venv_path / 'bin' / 'python'
+    venv_python = pip_venv_path / "bin" / "python"
 
     # Prefer a stable system python for venv creation if provided/available
-    preferred_python = os.environ.get('PIP_VENV_PYTHON', '').strip()
+    preferred_python = os.environ.get("PIP_VENV_PYTHON", "").strip()
     if not preferred_python:
-        for candidate in ('python3.12', 'python3.11', 'python3.10'):
+        for candidate in (
+            "python3.14",
+            "python3.13",
+            "python3.12",
+            "python3.11",
+            "python3.10",
+        ):
             if shutil.which(candidate):
                 preferred_python = candidate
                 break
     if preferred_python and not venv_python.exists():
         try:
             subprocess.run(
-                [preferred_python, '-m', 'venv', str(pip_venv_path), '--upgrade-deps'],
+                [preferred_python, "-m", "venv", str(pip_venv_path), "--upgrade-deps"],
                 check=True,
             )
         except Exception:
@@ -86,12 +91,18 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
             try:
                 overrides_dict = json.loads(overrides)
                 # Extract pip-specific overrides
-                overrides_dict = overrides_dict.get('pip', {})
+                overrides_dict = overrides_dict.get("pip", {})
                 click.echo(f"Using pip install overrides: {overrides_dict}", err=True)
             except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides={'pip': overrides_dict} if overrides_dict else {}).install()
+                click.echo(
+                    f"Warning: Failed to parse overrides JSON: {overrides}", err=True
+                )
+
+        binary = Binary(
+            name=name,
+            binproviders=[provider],
+            overrides={"pip": overrides_dict} if overrides_dict else {},
+        ).install()
     except Exception as e:
         click.echo(f"pip install failed: {e}", err=True)
         sys.exit(1)
@@ -102,30 +113,34 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
 
     # Output Binary JSONL record to stdout
     record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'pip',
+        "type": "Binary",
+        "name": name,
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": "pip",
     }
     print(json.dumps(record))
 
     # Emit PATH update for pip bin dir
-    pip_bin_dir = str(pip_venv_path / 'bin')
-    current_path = os.environ.get('PATH', '')
+    pip_bin_dir = str(pip_venv_path / "bin")
+    current_path = os.environ.get("PATH", "")
 
     # Check if pip_bin_dir is already in PATH
-    path_dirs = current_path.split(':')
+    path_dirs = current_path.split(":")
     new_path = f"{pip_bin_dir}:{current_path}" if current_path else pip_bin_dir
     if pip_bin_dir in path_dirs:
         new_path = current_path
-    print(json.dumps({
-        'type': 'Machine',
-        'config': {
-            'PATH': new_path,
-        },
-    }))
+    print(
+        json.dumps(
+            {
+                "type": "Machine",
+                "config": {
+                    "PATH": new_path,
+                },
+            }
+        )
+    )
 
     # Log human-readable info to stderr
     click.echo(f"Installed {name} at {binary.abspath}", err=True)
@@ -134,5 +149,5 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/pip/tests/test_pip_provider.py b/abx_plugins/plugins/pip/tests/test_pip_provider.py
index a825dc6..ba4d1b7 100644
--- a/abx_plugins/plugins/pip/tests/test_pip_provider.py
+++ b/abx_plugins/plugins/pip/tests/test_pip_provider.py
@@ -14,14 +14,13 @@
 import sys
 import tempfile
 from pathlib import Path
-from unittest.mock import patch, MagicMock
 
 import pytest
 
 
 # Get the path to the pip provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_pip_install.py'), None)
+INSTALL_HOOK = next(PLUGIN_DIR.glob("on_Binary__*_pip_install.py"), None)
 
 
 class TestPipProviderHook:
@@ -30,12 +29,13 @@ class TestPipProviderHook:
     def setup_method(self, _method=None):
         """Set up test environment."""
         self.temp_dir = tempfile.mkdtemp()
-        self.output_dir = Path(self.temp_dir) / 'output'
+        self.output_dir = Path(self.temp_dir) / "output"
         self.output_dir.mkdir()
 
     def teardown_method(self, _method=None):
         """Clean up."""
         import shutil
+
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def test_hook_script_exists(self):
@@ -45,55 +45,56 @@ def test_hook_script_exists(self):
     def test_hook_help(self):
         """Hook should accept --help without error."""
         result = subprocess.run(
-            [sys.executable, str(INSTALL_HOOK), '--help'],
+            [sys.executable, str(INSTALL_HOOK), "--help"],
             capture_output=True,
             text=True,
-            timeout=30
+            timeout=30,
         )
         # May succeed or fail depending on implementation
         # At minimum should not crash with Python error
-        assert 'Traceback' not in result.stderr
+        assert "Traceback" not in result.stderr
 
     def test_hook_finds_pip(self):
         """Hook should find pip binary."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
-        env['HOME'] = self.temp_dir
-        env.pop('LIB_DIR', None)
+        env["SNAP_DIR"] = self.temp_dir
+        env["HOME"] = self.temp_dir
+        env.pop("LIB_DIR", None)
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=pip',
-                '--binproviders=pip',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=pip",
+                "--binproviders=pip",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             cwd=str(self.output_dir),
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Check for JSONL output
         jsonl_found = False
-        for line in result.stdout.split('\n'):
+        for line in result.stdout.split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'pip':
+                    if record.get("type") == "Binary" and record.get("name") == "pip":
                         jsonl_found = True
                         # Verify structure
-                        assert 'abspath' in record
-                        assert 'version' in record
+                        assert "abspath" in record
+                        assert "version" in record
                         break
                 except json.JSONDecodeError:
                     continue
 
         # Should not crash
-        assert 'Traceback' not in result.stderr
+        assert "Traceback" not in result.stderr
 
         # Should find pip via pip provider
         assert jsonl_found, "Expected to find pip binary in JSONL output"
@@ -101,27 +102,28 @@ def test_hook_finds_pip(self):
     def test_hook_unknown_package(self):
         """Hook should handle unknown packages gracefully."""
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
-        env['HOME'] = self.temp_dir
-        env.pop('LIB_DIR', None)
+        env["SNAP_DIR"] = self.temp_dir
+        env["HOME"] = self.temp_dir
+        env.pop("LIB_DIR", None)
 
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_package_xyz123',
-                '--binproviders=pip',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=nonexistent_package_xyz123",
+                "--binproviders=pip",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             cwd=str(self.output_dir),
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Should not crash
-        assert 'Traceback' not in result.stderr
+        assert "Traceback" not in result.stderr
         # May have non-zero exit code for missing package
 
 
@@ -131,60 +133,64 @@ class TestPipProviderIntegration:
     def setup_method(self, _method=None):
         """Set up test environment."""
         self.temp_dir = tempfile.mkdtemp()
-        self.output_dir = Path(self.temp_dir) / 'output'
+        self.output_dir = Path(self.temp_dir) / "output"
         self.output_dir.mkdir()
 
     def teardown_method(self, _method=None):
         """Clean up."""
         import shutil
+
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def test_hook_finds_pip_installed_binary(self):
         """Hook should find binaries installed via pip."""
         pip_check = subprocess.run(
-            [sys.executable, '-m', 'pip', '--version'],
+            [sys.executable, "-m", "pip", "--version"],
             capture_output=True,
             text=True,
         )
         assert pip_check.returncode == 0, "pip not available"
         env = os.environ.copy()
-        env['SNAP_DIR'] = self.temp_dir
-        env['HOME'] = self.temp_dir
-        env.pop('LIB_DIR', None)
+        env["SNAP_DIR"] = self.temp_dir
+        env["HOME"] = self.temp_dir
+        env.pop("LIB_DIR", None)
 
         # Try to find 'pip' itself which should be available
         result = subprocess.run(
             [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=pip',
-                '--binproviders=pip,env',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
+                sys.executable,
+                str(INSTALL_HOOK),
+                "--name=pip",
+                "--binproviders=pip,env",
+                "--binary-id=test-uuid",
+                "--machine-id=test-machine",
             ],
             capture_output=True,
             text=True,
             cwd=str(self.output_dir),
             env=env,
-            timeout=60
+            timeout=60,
         )
 
         # Look for success in output
-        for line in result.stdout.split('\n'):
+        for line in result.stdout.split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and 'pip' in record.get('name', ''):
+                    if record.get("type") == "Binary" and "pip" in record.get(
+                        "name", ""
+                    ):
                         # Found pip binary
-                        assert record.get('abspath')
+                        assert record.get("abspath")
                         return
                 except json.JSONDecodeError:
                     continue
 
         # If we get here without finding pip, that's acceptable
         # as long as the hook didn't crash
-        assert 'Traceback' not in result.stderr
+        assert "Traceback" not in result.stderr
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/puppeteer/on_Binary__12_puppeteer_install.py b/abx_plugins/plugins/puppeteer/on_Binary__12_puppeteer_install.py
index 44b960e..2b633c7 100755
--- a/abx_plugins/plugins/puppeteer/on_Binary__12_puppeteer_install.py
+++ b/abx_plugins/plugins/puppeteer/on_Binary__12_puppeteer_install.py
@@ -16,62 +16,93 @@
 import json
 import os
 import re
+import shutil
 import sys
 from pathlib import Path
 
 import rich_click as click
-from abx_pkg import Binary, EnvProvider, NpmProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-NpmProvider.model_rebuild()
+from abx_pkg import Binary, EnvProvider, NpmProvider
 
 
 @click.command()
-@click.option('--machine-id', required=True, help='Machine UUID')
-@click.option('--binary-id', required=True, help='Binary UUID')
-@click.option('--name', required=True, help='Binary name to install')
-@click.option('--binproviders', default='*', help='Allowed providers (comma-separated)')
-@click.option('--overrides', default=None, help='JSON-encoded overrides dict')
-def main(machine_id: str, binary_id: str, name: str, binproviders: str, overrides: str | None) -> None:
-    if binproviders != '*' and 'puppeteer' not in binproviders.split(','):
+@click.option("--machine-id", required=True, help="Machine UUID")
+@click.option("--binary-id", required=True, help="Binary UUID")
+@click.option("--name", required=True, help="Binary name to install")
+@click.option("--binproviders", default="*", help="Allowed providers (comma-separated)")
+@click.option("--overrides", default=None, help="JSON-encoded overrides dict")
+def main(
+    machine_id: str, binary_id: str, name: str, binproviders: str, overrides: str | None
+) -> None:
+    if binproviders != "*" and "puppeteer" not in binproviders.split(","):
         sys.exit(0)
 
-    if name not in ('chromium', 'chrome'):
+    if name not in ("chromium", "chrome"):
         sys.exit(0)
 
-    lib_dir = os.environ.get('LIB_DIR', '').strip()
+    lib_dir = os.environ.get("LIB_DIR", "").strip()
     if not lib_dir:
-        lib_dir = str(Path.home() / '.config' / 'abx' / 'lib')
+        lib_dir = str(Path.home() / ".config" / "abx" / "lib")
 
-    npm_prefix = Path(lib_dir) / 'npm'
+    npm_prefix = Path(lib_dir) / "npm"
     npm_prefix.mkdir(parents=True, exist_ok=True)
     npm_provider = NpmProvider(npm_prefix=npm_prefix)
-    cache_dir = Path(lib_dir) / 'puppeteer'
+    cache_dir = Path(lib_dir) / "puppeteer"
     cache_dir.mkdir(parents=True, exist_ok=True)
-    os.environ.setdefault('PUPPETEER_CACHE_DIR', str(cache_dir))
+    os.environ.setdefault("PUPPETEER_CACHE_DIR", str(cache_dir))
+
+    # Fast-path: if CHROME_BINARY is already available in env, reuse it and avoid
+    # a full `puppeteer browsers install` call for this invocation.
+    existing_chrome_binary = os.environ.get("CHROME_BINARY", "").strip()
+    if existing_chrome_binary:
+        existing_binary = _load_binary_from_path(existing_chrome_binary)
+        if existing_binary and existing_binary.abspath:
+            _emit_chromium_binary_record(
+                binary=existing_binary,
+                machine_id=machine_id,
+                binary_id=binary_id,
+            )
+            print(
+                json.dumps(
+                    {
+                        "type": "Machine",
+                        "config": {
+                            "CHROME_BINARY": str(existing_binary.abspath),
+                            "CHROMIUM_VERSION": str(existing_binary.version)
+                            if existing_binary.version
+                            else "",
+                        },
+                    }
+                )
+            )
+            sys.exit(0)
 
     puppeteer_binary = Binary(
-        name='puppeteer',
+        name="puppeteer",
         binproviders=[npm_provider, EnvProvider()],
-        overrides={'npm': {'packages': ['puppeteer']}},
+        overrides={"npm": {"packages": ["puppeteer"]}},
     ).load()
 
     if not puppeteer_binary.abspath:
-        click.echo('ERROR: puppeteer binary not found (install puppeteer first)', err=True)
+        click.echo(
+            "ERROR: puppeteer binary not found (install puppeteer first)", err=True
+        )
         sys.exit(1)
 
-    install_args = _parse_override_packages(overrides, default=['chromium@latest', '--install-deps'])
-    cmd = ['browsers', 'install', *install_args]
-    proc = puppeteer_binary.exec(cmd=cmd, timeout=300)
+    install_args = _parse_override_packages(
+        overrides, default=["chromium@latest", "--install-deps"]
+    )
+    proc = _run_puppeteer_install(
+        binary=puppeteer_binary, install_args=install_args, cache_dir=cache_dir
+    )
     if proc.returncode != 0:
         click.echo(proc.stdout.strip(), err=True)
         click.echo(proc.stderr.strip(), err=True)
-        click.echo(f'ERROR: puppeteer install failed ({proc.returncode})', err=True)
+        click.echo(f"ERROR: puppeteer install failed ({proc.returncode})", err=True)
         sys.exit(1)
 
-    chromium_binary = _load_chromium_binary(proc.stdout + '\n' + proc.stderr)
+    chromium_binary = _load_chromium_binary(proc.stdout + "\n" + proc.stderr)
     if not chromium_binary or not chromium_binary.abspath:
-        click.echo('ERROR: failed to locate Chromium after install', err=True)
+        click.echo("ERROR: failed to locate Chromium after install", err=True)
         sys.exit(1)
 
     _emit_chromium_binary_record(
@@ -81,14 +112,20 @@ def main(machine_id: str, binary_id: str, name: str, binproviders: str, override
     )
 
     config_patch = {
-        'CHROME_BINARY': str(chromium_binary.abspath),
-        'CHROMIUM_VERSION': str(chromium_binary.version) if chromium_binary.version else '',
+        "CHROME_BINARY": str(chromium_binary.abspath),
+        "CHROMIUM_VERSION": str(chromium_binary.version)
+        if chromium_binary.version
+        else "",
     }
 
-    print(json.dumps({
-        'type': 'Machine',
-        'config': config_patch,
-    }))
+    print(
+        json.dumps(
+            {
+                "type": "Machine",
+                "config": config_patch,
+            }
+        )
+    )
 
     sys.exit(0)
 
@@ -102,9 +139,9 @@ def _parse_override_packages(overrides: str | None, default: list[str]) -> list[
         return default
 
     if isinstance(overrides_dict, dict):
-        provider_overrides = overrides_dict.get('puppeteer')
+        provider_overrides = overrides_dict.get("puppeteer")
         if isinstance(provider_overrides, dict):
-            packages = provider_overrides.get('packages')
+            packages = provider_overrides.get("packages")
             if isinstance(packages, list) and packages:
                 return [str(arg) for arg in packages]
         if isinstance(provider_overrides, list) and provider_overrides:
@@ -115,54 +152,126 @@ def _parse_override_packages(overrides: str | None, default: list[str]) -> list[
     return default
 
 
-def _emit_chromium_binary_record(binary: Binary, machine_id: str, binary_id: str) -> None:
+def _run_puppeteer_install(binary: Binary, install_args: list[str], cache_dir: Path):
+    cmd = ["browsers", "install", *install_args]
+    proc = binary.exec(cmd=cmd, timeout=300)
+    if proc.returncode == 0:
+        return proc
+
+    install_output = f"{proc.stdout}\n{proc.stderr}"
+    if not _cleanup_partial_chromium_cache(install_output, cache_dir):
+        return proc
+
+    return binary.exec(cmd=cmd, timeout=300)
+
+
+def _cleanup_partial_chromium_cache(install_output: str, cache_dir: Path) -> bool:
+    targets: set[Path] = set()
+    chromium_cache_dir = cache_dir / "chromium"
+
+    missing_dir_match = re.search(
+        r"browser folder \(([^)]+)\) exists but the executable", install_output
+    )
+    if missing_dir_match:
+        targets.add(Path(missing_dir_match.group(1)))
+
+    missing_zip_match = re.search(r"open '([^']+\.zip)'", install_output)
+    if missing_zip_match:
+        targets.add(Path(missing_zip_match.group(1)))
+
+    build_id_match = re.search(
+        r"All providers failed for chromium (\d+)", install_output
+    )
+    if build_id_match and chromium_cache_dir.exists():
+        build_id = build_id_match.group(1)
+        targets.update(chromium_cache_dir.glob(f"*{build_id}*"))
+
+    removed_any = False
+    for target in targets:
+        resolved_target = target.resolve(strict=False)
+        resolved_cache = cache_dir.resolve(strict=False)
+        if not (
+            resolved_target == resolved_cache
+            or resolved_cache in resolved_target.parents
+        ):
+            continue
+        if target.is_dir():
+            shutil.rmtree(target, ignore_errors=True)
+            removed_any = True
+            continue
+        if target.exists():
+            target.unlink(missing_ok=True)
+            removed_any = True
+
+    return removed_any
+
+
+def _emit_chromium_binary_record(
+    binary: Binary, machine_id: str, binary_id: str
+) -> None:
     record = {
-        'type': 'Binary',
-        'name': 'chromium',
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'puppeteer',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
+        "type": "Binary",
+        "name": "chromium",
+        "abspath": str(binary.abspath),
+        "version": str(binary.version) if binary.version else "",
+        "sha256": binary.sha256 or "",
+        "binprovider": "puppeteer",
+        "machine_id": machine_id,
+        "binary_id": binary_id,
     }
     print(json.dumps(record))
 
 
+def _load_binary_from_path(path: str) -> Binary | None:
+    try:
+        binary = Binary(
+            name="chromium",
+            binproviders=[EnvProvider()],
+            overrides={"env": {"abspath": str(path)}},
+        ).load()
+    except Exception:
+        return None
+    if binary and binary.abspath:
+        return binary
+    return None
+
+
 def _load_chromium_binary(output: str) -> Binary | None:
     candidates: list[Path] = []
-    match = re.search(r'(?:chromium|chrome)@[^\s]+\s+(\S+)', output)
+    match = re.search(r"(?:chromium|chrome)@[^\s]+\s+(\S+)", output)
     if match:
         candidates.append(Path(match.group(1)))
 
     cache_dirs: list[Path] = []
-    cache_env = os.environ.get('PUPPETEER_CACHE_DIR')
+    cache_env = os.environ.get("PUPPETEER_CACHE_DIR")
     if cache_env:
         cache_dirs.append(Path(cache_env))
 
     home = Path.home()
-    cache_dirs.extend([
-        home / '.cache' / 'puppeteer',
-        home / 'Library' / 'Caches' / 'puppeteer',
-    ])
+    cache_dirs.extend(
+        [
+            home / ".cache" / "puppeteer",
+            home / "Library" / "Caches" / "puppeteer",
+        ]
+    )
 
     for base in cache_dirs:
-        for root in (base, base / 'chromium', base / 'chrome'):
+        for root in (base, base / "chromium", base / "chrome"):
             try:
-                candidates.extend(root.rglob('Chromium.app/Contents/MacOS/Chromium'))
+                candidates.extend(root.rglob("Chromium.app/Contents/MacOS/Chromium"))
             except Exception:
                 pass
             try:
-                candidates.extend(root.rglob('chrome'))
+                candidates.extend(root.rglob("chrome"))
             except Exception:
                 pass
 
     for candidate in candidates:
         try:
             binary = Binary(
-                name='chromium',
+                name="chromium",
                 binproviders=[EnvProvider()],
-                overrides={'env': {'abspath': str(candidate)}},
+                overrides={"env": {"abspath": str(candidate)}},
             ).load()
         except Exception:
             continue
@@ -172,5 +281,5 @@ def _load_chromium_binary(output: str) -> Binary | None:
     return None
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/puppeteer/on_Crawl__60_puppeteer_install.py b/abx_plugins/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
index 47570b2..3a5a4e3 100755
--- a/abx_plugins/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
+++ b/abx_plugins/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
@@ -14,24 +14,29 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
 def main() -> None:
-    enabled = os.environ.get('PUPPETEER_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
+    enabled = os.environ.get("PUPPETEER_ENABLED", "true").lower() not in (
+        "false",
+        "0",
+        "no",
+        "off",
+    )
     if not enabled:
         sys.exit(0)
 
     record = {
-        'type': 'Binary',
-        'name': 'puppeteer',
-        'binproviders': 'npm,env',
-        'overrides': {
-            'npm': {
-                'packages': ['puppeteer'],
+        "type": "Binary",
+        "name": "puppeteer",
+        "binproviders": "npm,env",
+        "overrides": {
+            "npm": {
+                "packages": ["puppeteer"],
             }
         },
     }
@@ -39,5 +44,5 @@ def main() -> None:
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/puppeteer/tests/test_puppeteer.py b/abx_plugins/plugins/puppeteer/tests/test_puppeteer.py
index 00077d6..a9e22d3 100644
--- a/abx_plugins/plugins/puppeteer/tests/test_puppeteer.py
+++ b/abx_plugins/plugins/puppeteer/tests/test_puppeteer.py
@@ -8,7 +8,6 @@
 import tempfile
 from pathlib import Path
 
-import pytest
 
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_plugin_dir,
@@ -17,9 +16,9 @@
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-CRAWL_HOOK = get_hook_script(PLUGIN_DIR, 'on_Crawl__*_puppeteer_install.py')
-BINARY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Binary__*_puppeteer_install.py')
-NPM_BINARY_HOOK = PLUGIN_DIR.parent / 'npm' / 'on_Binary__10_npm_install.py'
+CRAWL_HOOK = get_hook_script(PLUGIN_DIR, "on_Crawl__*_puppeteer_install.py")
+BINARY_HOOK = get_hook_script(PLUGIN_DIR, "on_Binary__*_puppeteer_install.py")
+NPM_BINARY_HOOK = PLUGIN_DIR.parent / "npm" / "on_Binary__10_npm_install.py"
 
 
 def test_hook_scripts_exist():
@@ -40,20 +39,30 @@ def test_crawl_hook_emits_puppeteer_binary():
         )
 
         assert result.returncode == 0, f"crawl hook failed: {result.stderr}"
-        records = [json.loads(line) for line in result.stdout.splitlines() if line.strip().startswith('{')]
-        binaries = [r for r in records if r.get('type') == 'Binary' and r.get('name') == 'puppeteer']
+        records = [
+            json.loads(line)
+            for line in result.stdout.splitlines()
+            if line.strip().startswith("{")
+        ]
+        binaries = [
+            r
+            for r in records
+            if r.get("type") == "Binary" and r.get("name") == "puppeteer"
+        ]
         assert binaries, f"Expected Binary record for puppeteer, got: {records}"
-        assert 'npm' in binaries[0].get('binproviders', ''), "puppeteer should be installable via npm provider"
+        assert "npm" in binaries[0].get("binproviders", ""), (
+            "puppeteer should be installable via npm provider"
+        )
 
 
 def test_puppeteer_installs_chromium():
-    assert shutil.which('npm'), "npm is required for puppeteer installation"
+    assert shutil.which("npm"), "npm is required for puppeteer installation"
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
         env = os.environ.copy()
-        env['HOME'] = str(tmpdir)
-        env.pop('LIB_DIR', None)
+        env["HOME"] = str(tmpdir)
+        env.pop("LIB_DIR", None)
 
         crawl_result = subprocess.run(
             [sys.executable, str(CRAWL_HOOK)],
@@ -64,22 +73,32 @@ def test_puppeteer_installs_chromium():
             timeout=30,
         )
         assert crawl_result.returncode == 0, f"crawl hook failed: {crawl_result.stderr}"
-        crawl_records = [json.loads(line) for line in crawl_result.stdout.splitlines() if line.strip().startswith('{')]
+        crawl_records = [
+            json.loads(line)
+            for line in crawl_result.stdout.splitlines()
+            if line.strip().startswith("{")
+        ]
         puppeteer_record = next(
-            (r for r in crawl_records if r.get('type') == 'Binary' and r.get('name') == 'puppeteer'),
+            (
+                r
+                for r in crawl_records
+                if r.get("type") == "Binary" and r.get("name") == "puppeteer"
+            ),
             None,
         )
-        assert puppeteer_record, f"Expected puppeteer Binary record, got: {crawl_records}"
+        assert puppeteer_record, (
+            f"Expected puppeteer Binary record, got: {crawl_records}"
+        )
 
         npm_result = subprocess.run(
             [
                 sys.executable,
                 str(NPM_BINARY_HOOK),
-                '--machine-id=test-machine',
-                '--binary-id=test-puppeteer',
-                '--name=puppeteer',
+                "--machine-id=test-machine",
+                "--binary-id=test-puppeteer",
+                "--name=puppeteer",
                 f"--binproviders={puppeteer_record.get('binproviders', '*')}",
-                '--overrides=' + json.dumps(puppeteer_record.get('overrides') or {}),
+                "--overrides=" + json.dumps(puppeteer_record.get("overrides") or {}),
             ],
             cwd=tmpdir,
             capture_output=True,
@@ -97,11 +116,12 @@ def test_puppeteer_installs_chromium():
             [
                 sys.executable,
                 str(BINARY_HOOK),
-                '--machine-id=test-machine',
-                '--binary-id=test-binary',
-                '--name=chromium',
-                '--binproviders=puppeteer',
-                '--overrides=' + json.dumps({'puppeteer': ['chromium@latest', '--install-deps']}),
+                "--machine-id=test-machine",
+                "--binary-id=test-binary",
+                "--name=chromium",
+                "--binproviders=puppeteer",
+                "--overrides="
+                + json.dumps({"puppeteer": ["chromium@latest", "--install-deps"]}),
             ],
             cwd=tmpdir,
             capture_output=True,
@@ -116,8 +136,18 @@ def test_puppeteer_installs_chromium():
             f"stderr:\n{result.stderr}"
         )
 
-        records = [json.loads(line) for line in result.stdout.splitlines() if line.strip().startswith('{')]
-        binaries = [r for r in records if r.get('type') == 'Binary' and r.get('name') == 'chromium']
+        records = [
+            json.loads(line)
+            for line in result.stdout.splitlines()
+            if line.strip().startswith("{")
+        ]
+        binaries = [
+            r
+            for r in records
+            if r.get("type") == "Binary" and r.get("name") == "chromium"
+        ]
         assert binaries, f"Expected Binary record for chromium, got: {records}"
-        abspath = binaries[0].get('abspath')
-        assert abspath and Path(abspath).exists(), f"Chromium binary path invalid: {abspath}"
+        abspath = binaries[0].get("abspath")
+        assert abspath and Path(abspath).exists(), (
+            f"Chromium binary path invalid: {abspath}"
+        )
diff --git a/abx_plugins/plugins/readability/on_Crawl__35_readability_install.py b/abx_plugins/plugins/readability/on_Crawl__35_readability_install.py
index 7ec6bc5..078988e 100755
--- a/abx_plugins/plugins/readability/on_Crawl__35_readability_install.py
+++ b/abx_plugins/plugins/readability/on_Crawl__35_readability_install.py
@@ -12,52 +12,53 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
 def output_binary(name: str, binproviders: str):
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
     record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'overrides': {
-            'npm': {
-                'packages': ['https://github.com/ArchiveBox/readability-extractor'],
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "overrides": {
+            "npm": {
+                "packages": ["https://github.com/ArchiveBox/readability-extractor"],
             },
         },
-        'machine_id': machine_id,
+        "machine_id": machine_id,
     }
     print(json.dumps(record))
 
 
 def main():
-    readability_enabled = get_env_bool('READABILITY_ENABLED', True)
+    readability_enabled = get_env_bool("READABILITY_ENABLED", True)
 
     if not readability_enabled:
         sys.exit(0)
 
-    output_binary(name='readability-extractor', binproviders='npm,env')
+    output_binary(name="readability-extractor", binproviders="npm,env")
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/readability/on_Snapshot__56_readability.py b/abx_plugins/plugins/readability/on_Snapshot__56_readability.py
index d69b8c4..04ac634 100755
--- a/abx_plugins/plugins/readability/on_Snapshot__56_readability.py
+++ b/abx_plugins/plugins/readability/on_Snapshot__56_readability.py
@@ -26,7 +26,6 @@
 import os
 import subprocess
 import sys
-import tempfile
 from pathlib import Path
 from urllib.parse import urlparse
 
@@ -34,18 +33,18 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'readability'
-BIN_NAME = 'readability-extractor'
-BIN_PROVIDERS = 'npm,env'
+PLUGIN_NAME = "readability"
+BIN_NAME = "readability-extractor"
+BIN_PROVIDERS = "npm,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-OUTPUT_FILE = 'content.html'
+OUTPUT_FILE = "content.html"
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
@@ -58,7 +57,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -74,18 +73,18 @@ def find_html_source() -> str | None:
     """Find HTML content from other extractors in the snapshot directory."""
     # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
     search_patterns = [
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
+        "singlefile/singlefile.html",
+        "*_singlefile/singlefile.html",
+        "singlefile/*.html",
+        "*_singlefile/*.html",
+        "dom/output.html",
+        "*_dom/output.html",
+        "dom/*.html",
+        "*_dom/*.html",
+        "wget/**/*.html",
+        "*_wget/**/*.html",
+        "wget/**/*.htm",
+        "*_wget/**/*.htm",
     ]
 
     for base in (Path.cwd(), Path.cwd().parent):
@@ -104,14 +103,14 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
 
     Returns: (success, output_path, error_message)
     """
-    timeout = get_env_int('READABILITY_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    readability_args = get_env_array('READABILITY_ARGS', [])
-    readability_args_extra = get_env_array('READABILITY_ARGS_EXTRA', [])
+    timeout = get_env_int("READABILITY_TIMEOUT") or get_env_int("TIMEOUT", 60)
+    readability_args = get_env_array("READABILITY_ARGS", [])
+    readability_args_extra = get_env_array("READABILITY_ARGS_EXTRA", [])
 
     # Find HTML source
     html_source = find_html_source()
     if not html_source:
-        return False, None, 'No HTML source found (run singlefile, dom, or wget first)'
+        return False, None, "No HTML source found (run singlefile, dom, or wget first)"
 
     # Output directory is current directory (hook already runs in output dir)
     output_dir = Path(OUTPUT_DIR)
@@ -126,32 +125,42 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
             sys.stderr.flush()
 
         if result.returncode != 0:
-            return False, None, f'readability-extractor failed (exit={result.returncode})'
+            return (
+                False,
+                None,
+                f"readability-extractor failed (exit={result.returncode})",
+            )
 
         # Parse JSON output
         try:
             result_json = json.loads(result.stdout)
         except json.JSONDecodeError:
-            return False, None, 'readability-extractor returned invalid JSON'
+            return False, None, "readability-extractor returned invalid JSON"
 
         # Extract and save content
         # readability-extractor uses camelCase field names (textContent, content)
-        text_content = result_json.pop('textContent', result_json.pop('text-content', ''))
-        html_content = result_json.pop('content', result_json.pop('html-content', ''))
+        text_content = result_json.pop(
+            "textContent", result_json.pop("text-content", "")
+        )
+        html_content = result_json.pop("content", result_json.pop("html-content", ""))
 
         if not text_content and not html_content:
-            return False, None, 'No content extracted'
+            return False, None, "No content extracted"
 
-        (output_dir / OUTPUT_FILE).write_text(html_content, encoding='utf-8')
-        (output_dir / 'content.txt').write_text(text_content, encoding='utf-8')
-        (output_dir / 'article.json').write_text(json.dumps(result_json, indent=2), encoding='utf-8')
+        (output_dir / OUTPUT_FILE).write_text(html_content, encoding="utf-8")
+        (output_dir / "content.txt").write_text(text_content, encoding="utf-8")
+        (output_dir / "article.json").write_text(
+            json.dumps(result_json, indent=2), encoding="utf-8"
+        )
 
         # Link images/ to responses capture (if available)
         try:
-            hostname = urlparse(url).hostname or ''
+            hostname = urlparse(url).hostname or ""
             if hostname:
-                responses_images = (output_dir / '..' / 'responses' / 'image' / hostname / 'images').resolve()
-                link_path = output_dir / 'images'
+                responses_images = (
+                    output_dir / ".." / "responses" / "image" / hostname / "images"
+                ).resolve()
+                link_path = output_dir / "images"
                 if responses_images.exists() and responses_images.is_dir():
                     if link_path.exists() or link_path.is_symlink():
                         if link_path.is_symlink() or link_path.is_file():
@@ -159,28 +168,30 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
                         else:
                             responses_images = None
                     if responses_images:
-                        rel_target = os.path.relpath(str(responses_images), str(output_dir))
+                        rel_target = os.path.relpath(
+                            str(responses_images), str(output_dir)
+                        )
                         link_path.symlink_to(rel_target)
         except Exception:
             pass
 
-        return True, OUTPUT_FILE, ''
+        return True, OUTPUT_FILE, ""
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to extract article from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to extract article from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Extract article content using Mozilla's Readability."""
 
     try:
         # Get binary from environment
-        binary = get_env('READABILITY_BINARY', 'readability-extractor')
+        binary = get_env("READABILITY_BINARY", "readability-extractor")
 
         # Run extraction
         success, output, error = extract_readability(url, binary)
@@ -188,22 +199,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/readability/tests/test_readability.py b/abx_plugins/plugins/readability/tests/test_readability.py
index af58dc4..e0b81b3 100644
--- a/abx_plugins/plugins/readability/tests/test_readability.py
+++ b/abx_plugins/plugins/readability/tests/test_readability.py
@@ -9,10 +9,11 @@
 """
 
 import json
-import shutil
+import os
 import subprocess
 import sys
 import tempfile
+import uuid
 from pathlib import Path
 
 import pytest
@@ -20,22 +21,29 @@
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_plugin_dir,
     get_hook_script,
-    PLUGINS_ROOT,
 )
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-READABILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_readability.*')
-TEST_URL = 'https://example.com'
+PLUGINS_ROOT = PLUGIN_DIR.parent
+_READABILITY_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_readability.*")
+if _READABILITY_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+READABILITY_HOOK = _READABILITY_HOOK
+TEST_URL = "https://example.com"
+
+# Module-level cache for binary path
+_readability_binary_path = None
+_readability_lib_root = None
 
 
 def create_example_html(tmpdir: Path) -> Path:
     """Create sample HTML that looks like example.com with enough content for Readability."""
-    singlefile_dir = tmpdir / 'singlefile'
+    singlefile_dir = tmpdir / "singlefile"
     singlefile_dir.mkdir()
 
-    html_file = singlefile_dir / 'singlefile.html'
-    html_file.write_text('''
+    html_file = singlefile_dir / "singlefile.html"
+    html_file.write_text("""
 <!DOCTYPE html>
 <html>
 <head>
@@ -69,11 +77,129 @@ def create_example_html(tmpdir: Path) -> Path:
     </article>
 </body>
 </html>
-    ''')
+    """)
 
     return html_file
 
 
+def require_readability_binary() -> str:
+    """Return readability-extractor binary path or fail with actionable context."""
+    binary_path = get_readability_binary_path()
+    assert binary_path, (
+        "readability-extractor installation failed. Install hook should install "
+        "the binary automatically in this test environment."
+    )
+    assert Path(binary_path).is_file(), (
+        f"readability-extractor binary path invalid: {binary_path}"
+    )
+    return binary_path
+
+
+def get_readability_binary_path():
+    """Get readability-extractor path from cache or by running install hooks."""
+    global _readability_binary_path
+    if _readability_binary_path and Path(_readability_binary_path).is_file():
+        return _readability_binary_path
+
+    from abx_pkg import Binary, NpmProvider, EnvProvider
+
+    try:
+        binary = Binary(
+            name="readability-extractor",
+            binproviders=[NpmProvider(), EnvProvider()],
+            overrides={
+                "npm": {
+                    "packages": ["https://github.com/ArchiveBox/readability-extractor"]
+                }
+            },
+        ).load()
+        if binary and binary.abspath:
+            _readability_binary_path = str(binary.abspath)
+            return _readability_binary_path
+    except Exception:
+        pass
+
+    npm_hook = PLUGINS_ROOT / "npm" / "on_Binary__10_npm_install.py"
+    crawl_hook = PLUGIN_DIR / "on_Crawl__35_readability_install.py"
+    if not npm_hook.exists():
+        return None
+
+    binary_id = str(uuid.uuid4())
+    machine_id = str(uuid.uuid4())
+    binproviders = "*"
+    overrides = None
+
+    if crawl_hook.exists():
+        crawl_result = subprocess.run(
+            [sys.executable, str(crawl_hook)],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        for line in crawl_result.stdout.strip().split("\n"):
+            if not line.strip().startswith("{"):
+                continue
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if (
+                record.get("type") == "Binary"
+                and record.get("name") == "readability-extractor"
+            ):
+                binproviders = record.get("binproviders", "*")
+                overrides = record.get("overrides")
+                break
+
+    global _readability_lib_root
+    if not _readability_lib_root:
+        _readability_lib_root = tempfile.mkdtemp(prefix="readability-lib-")
+
+    env = os.environ.copy()
+    env["HOME"] = str(_readability_lib_root)
+    env["SNAP_DIR"] = str(Path(_readability_lib_root) / "data")
+    env["CRAWL_DIR"] = str(Path(_readability_lib_root) / "crawl")
+    env.pop("LIB_DIR", None)
+
+    cmd = [
+        sys.executable,
+        str(npm_hook),
+        "--binary-id",
+        binary_id,
+        "--machine-id",
+        machine_id,
+        "--name",
+        "readability-extractor",
+        f"--binproviders={binproviders}",
+    ]
+    if overrides:
+        cmd.append(f"--overrides={json.dumps(overrides)}")
+
+    install_result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=300,
+        env=env,
+    )
+
+    for line in install_result.stdout.strip().split("\n"):
+        if not line.strip().startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if (
+            record.get("type") == "Binary"
+            and record.get("name") == "readability-extractor"
+        ):
+            _readability_binary_path = record.get("abspath")
+            return _readability_binary_path
+
+    return None
+
+
 def test_hook_script_exists():
     """Verify hook script exists."""
     assert READABILITY_HOOK.exists(), f"Hook script not found: {READABILITY_HOOK}"
@@ -83,60 +209,65 @@ def test_reports_missing_dependency_when_not_installed():
     """Test that script reports DEPENDENCY_NEEDED when readability-extractor is not found."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
 
         # Create HTML source so it doesn't fail on missing HTML
         create_example_html(snap_dir)
 
         # Run with empty PATH so binary won't be found
-        env = {'PATH': '/nonexistent', 'HOME': str(tmpdir), 'SNAP_DIR': str(snap_dir)}
+        env = {"PATH": "/nonexistent", "HOME": str(tmpdir), "SNAP_DIR": str(snap_dir)}
 
         result = subprocess.run(
-            [sys.executable, str(READABILITY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
+            [
+                sys.executable,
+                str(READABILITY_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test123",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            env=env
+            env=env,
         )
 
         # Missing binary is a transient error - should exit 1 with no JSONL
         assert result.returncode == 1, "Should exit 1 when dependency missing"
 
         # Should NOT emit JSONL (transient error - will be retried)
-        jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                      if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, "Should not emit JSONL for transient error (missing binary)"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            "Should not emit JSONL for transient error (missing binary)"
+        )
 
         # Should log error to stderr
-        assert 'readability-extractor' in result.stderr.lower() or 'error' in result.stderr.lower(), \
-            "Should report error in stderr"
+        assert (
+            "readability-extractor" in result.stderr.lower()
+            or "error" in result.stderr.lower()
+        ), "Should report error in stderr"
 
 
 def test_verify_deps_with_abx_pkg():
-    """Verify readability-extractor is available via abx-pkg."""
-    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
-
-    readability_binary = Binary(
-        name='readability-extractor',
-        binproviders=[NpmProvider(), EnvProvider()],
-        overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
+    """Verify readability-extractor is installed by real plugin install hooks."""
+    binary_path = require_readability_binary()
+    assert Path(binary_path).is_file(), (
+        f"Binary path must be a valid file: {binary_path}"
     )
-    readability_loaded = readability_binary.load()
-
-    if readability_loaded and readability_loaded.abspath:
-        assert True, "readability-extractor is available"
-    else:
-        pass
 
 
 def test_extracts_article_after_installation():
     """Test full workflow: extract article using readability-extractor from real HTML."""
-    # Prerequisites checked by earlier test (install hook should have run)
+    binary_path = require_readability_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
 
         # Create example.com HTML for readability to process
@@ -144,39 +275,47 @@ def test_extracts_article_after_installation():
 
         # Run readability extraction (should find the binary)
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(snap_dir)
+        env["SNAP_DIR"] = str(snap_dir)
+        env["READABILITY_BINARY"] = binary_path
         result = subprocess.run(
-            [sys.executable, str(READABILITY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(READABILITY_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify output files exist (hook writes to current directory)
-        html_file = snap_dir / 'readability' / 'content.html'
-        txt_file = snap_dir / 'readability' / 'content.txt'
-        json_file = snap_dir / 'readability' / 'article.json'
+        html_file = snap_dir / "readability" / "content.html"
+        txt_file = snap_dir / "readability" / "content.txt"
+        json_file = snap_dir / "readability" / "article.json"
 
         assert html_file.exists(), "content.html not created"
         assert txt_file.exists(), "content.txt not created"
@@ -184,17 +323,24 @@ def test_extracts_article_after_installation():
 
         # Verify HTML content contains REAL example.com text
         html_content = html_file.read_text()
-        assert len(html_content) > 100, f"HTML content too short: {len(html_content)} bytes"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('illustrative examples' in html_content.lower() or
-                'use in' in html_content.lower() or
-                'literature' in html_content.lower()), \
-            "Missing example.com description in HTML"
+        assert len(html_content) > 100, (
+            f"HTML content too short: {len(html_content)} bytes"
+        )
+        assert "example domain" in html_content.lower(), (
+            "Missing 'Example Domain' in HTML"
+        )
+        assert (
+            "illustrative examples" in html_content.lower()
+            or "use in" in html_content.lower()
+            or "literature" in html_content.lower()
+        ), "Missing example.com description in HTML"
 
         # Verify text content contains REAL example.com text
         txt_content = txt_file.read_text()
-        assert len(txt_content) > 50, f"Text content too short: {len(txt_content)} bytes"
-        assert 'example' in txt_content.lower(), "Missing 'example' in text"
+        assert len(txt_content) > 50, (
+            f"Text content too short: {len(txt_content)} bytes"
+        )
+        assert "example" in txt_content.lower(), "Missing 'example' in text"
 
         # Verify JSON metadata
         json_data = json.loads(json_file.read_text())
@@ -203,33 +349,42 @@ def test_extracts_article_after_installation():
 
 def test_fails_gracefully_without_html_source():
     """Test that extraction fails gracefully when no HTML source is available."""
-    # Prerequisites checked by earlier test (install hook should have run)
+    binary_path = require_readability_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
 
         # Don't create any HTML source files
 
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(snap_dir)
+        env["SNAP_DIR"] = str(snap_dir)
+        env["READABILITY_BINARY"] = binary_path
         result = subprocess.run(
-            [sys.executable, str(READABILITY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(READABILITY_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         assert result.returncode != 0, "Should fail without HTML source"
         combined_output = result.stdout + result.stderr
-        assert ('no html source' in combined_output.lower() or
-                'not found' in combined_output.lower() or
-                'ERROR=' in combined_output), \
-            "Should report missing HTML source"
+        assert (
+            "no html source" in combined_output.lower()
+            or "not found" in combined_output.lower()
+            or "ERROR=" in combined_output
+        ), "Should report missing HTML source"
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/redirects/tests/test_redirects.py b/abx_plugins/plugins/redirects/tests/test_redirects.py
index 4424c18..98570e9 100644
--- a/abx_plugins/plugins/redirects/tests/test_redirects.py
+++ b/abx_plugins/plugins/redirects/tests/test_redirects.py
@@ -14,18 +14,19 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
-    get_test_env,
+    CHROME_NAVIGATE_HOOK,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_urls,
 )
 
 
 def chrome_available() -> bool:
     """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
+    for name in ["chromium", "chromium-browser", "google-chrome", "chrome"]:
         if shutil.which(name):
             return True
     return False
@@ -33,7 +34,7 @@ def chrome_available() -> bool:
 
 # Get the path to the redirects hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-REDIRECTS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_redirects.*')
+REDIRECTS_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_redirects.*")
 
 
 class TestRedirectsPlugin:
@@ -41,7 +42,9 @@ class TestRedirectsPlugin:
 
     def test_redirects_hook_exists(self):
         """Redirects hook script should exist."""
-        assert REDIRECTS_HOOK is not None, "Redirects hook not found in plugin directory"
+        assert REDIRECTS_HOOK is not None, (
+            "Redirects hook not found in plugin directory"
+        )
         assert REDIRECTS_HOOK.exists(), f"Hook not found: {REDIRECTS_HOOK}"
 
 
@@ -57,75 +60,67 @@ def teardown_method(self, _method=None):
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def test_redirects_captures_navigation(self, chrome_test_urls):
-        """Redirects hook should capture URL navigation without errors."""
-        test_url = chrome_test_urls['redirect_url']
-        snapshot_id = 'test-redirects-snapshot'
+        """Redirects hook should capture redirect-chain records from navigation."""
+        test_url = chrome_test_urls["redirect_url"]
+        snapshot_id = "test-redirects-snapshot"
 
         try:
             with chrome_session(
                 self.temp_dir,
-                crawl_id='test-redirects-crawl',
+                crawl_id="test-redirects-crawl",
                 snapshot_id=snapshot_id,
                 test_url=test_url,
-                navigate=True,
+                navigate=False,
                 timeout=30,
             ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
-
                 # Run redirects hook with the active Chrome session (background hook)
                 result = subprocess.Popen(
-                    ['node', str(REDIRECTS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(REDIRECTS_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(snapshot_chrome_dir),
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     text=True,
-                    env=env
+                    env=env,
                 )
 
-                # Check for output file
-                snap_dir = Path(env['SNAP_DIR'])
-                redirects_output = snap_dir / 'redirects' / 'redirects.jsonl'
+                nav_result = subprocess.run(
+                    [
+                        "node",
+                        str(CHROME_NAVIGATE_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
+                    cwd=str(snapshot_chrome_dir),
+                    capture_output=True,
+                    text=True,
+                    timeout=120,
+                    env=env,
+                )
+                assert nav_result.returncode == 0, (
+                    f"Navigation failed: {nav_result.stderr}\nStdout: {nav_result.stdout}"
+                )
 
-                redirects_data = None
+                # Check for output file
+                snap_dir = Path(env["SNAP_DIR"])
+                redirects_output = snap_dir / "redirects" / "redirects.jsonl"
 
                 # Wait briefly for background hook to write output
-                for _ in range(10):
-                    if redirects_output.exists() and redirects_output.stat().st_size > 0:
+                for _ in range(30):
+                    if (
+                        redirects_output.exists()
+                        and redirects_output.stat().st_size > 0
+                    ):
                         break
                     time.sleep(1)
 
-                # Try parsing from file first
-                if redirects_output.exists():
-                    with open(redirects_output) as f:
-                        for line in f:
-                            line = line.strip()
-                            if line.startswith('{'):
-                                try:
-                                    redirects_data = json.loads(line)
-                                    break
-                                except json.JSONDecodeError:
-                                    continue
-
-                # Try parsing from stdout if not in file
-                if not redirects_data:
-                    try:
-                        stdout, stderr = result.communicate(timeout=5)
-                    except subprocess.TimeoutExpired:
-                        stdout, stderr = "", ""
-                    for line in stdout.split('\n'):
-                        line = line.strip()
-                        if line.startswith('{'):
-                            try:
-                                record = json.loads(line)
-                                if 'chain' in record or 'redirects' in record or record.get('type') == 'Redirects':
-                                    redirects_data = record
-                                    break
-                            except json.JSONDecodeError:
-                                continue
-
                 # Verify hook ran successfully
-                # example.com typically doesn't redirect, so we just verify no errors
                 if result.poll() is None:
                     result.terminate()
                     try:
@@ -135,12 +130,57 @@ def test_redirects_captures_navigation(self, chrome_test_urls):
                         stdout, stderr = result.communicate()
                 else:
                     stdout, stderr = result.communicate()
-                assert 'Traceback' not in stderr
-                assert 'Error:' not in stderr
+                assert "Traceback" not in stderr
+                assert "Error:" not in stderr
+
+                assert redirects_output.exists(), (
+                    f"redirects.jsonl not created in {redirects_output.parent}"
+                )
+                content = redirects_output.read_text().strip()
+                assert content, "redirects.jsonl should not be empty"
+
+                redirects_records = []
+                for line in content.split("\n"):
+                    line = line.strip()
+                    if not line.startswith("{"):
+                        continue
+                    try:
+                        redirects_records.append(json.loads(line))
+                    except json.JSONDecodeError:
+                        continue
+
+                assert redirects_records, "No redirect records captured"
+                assert any(record.get("to_url") for record in redirects_records), (
+                    f"Redirect records missing to_url: {redirects_records}"
+                )
+                assert any(
+                    record.get("type") == "http"
+                    and str(record.get("status")) in {"301", "302", "303", "307", "308"}
+                    for record in redirects_records
+                ), f"No HTTP redirect captured: {redirects_records}"
+
+                archive_result = None
+                for line in stdout.split("\n"):
+                    line = line.strip()
+                    if not line.startswith("{"):
+                        continue
+                    try:
+                        record = json.loads(line)
+                    except json.JSONDecodeError:
+                        continue
+                    if record.get("type") == "ArchiveResult":
+                        archive_result = record
+                        break
+                assert archive_result is not None, (
+                    "Missing ArchiveResult from redirects hook"
+                )
+                assert archive_result.get("status") == "succeeded", (
+                    f"Redirects hook did not report success: {archive_result}"
+                )
 
         except RuntimeError:
             raise
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/responses/tests/test_responses.py b/abx_plugins/plugins/responses/tests/test_responses.py
index 55822fa..635420d 100644
--- a/abx_plugins/plugins/responses/tests/test_responses.py
+++ b/abx_plugins/plugins/responses/tests/test_responses.py
@@ -14,18 +14,19 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
     CHROME_NAVIGATE_HOOK,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_url,
 )
 
 
 # Get the path to the responses hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-RESPONSES_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_responses.*')
+RESPONSES_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_responses.*")
 
 
 class TestResponsesPlugin:
@@ -33,7 +34,9 @@ class TestResponsesPlugin:
 
     def test_responses_hook_exists(self):
         """Responses hook script should exist."""
-        assert RESPONSES_HOOK is not None, "Responses hook not found in plugin directory"
+        assert RESPONSES_HOOK is not None, (
+            "Responses hook not found in plugin directory"
+        )
         assert RESPONSES_HOOK.exists(), f"Hook not found: {RESPONSES_HOOK}"
 
 
@@ -51,41 +54,51 @@ def teardown_method(self, _method=None):
     def test_responses_captures_network_responses(self, chrome_test_url):
         """Responses hook should capture network responses from page load."""
         test_url = chrome_test_url
-        snapshot_id = 'test-responses-snapshot'
+        snapshot_id = "test-responses-snapshot"
 
         with chrome_session(
             self.temp_dir,
-            crawl_id='test-responses-crawl',
+            crawl_id="test-responses-crawl",
             snapshot_id=snapshot_id,
             test_url=test_url,
             navigate=False,
             timeout=30,
         ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            responses_dir = snapshot_chrome_dir.parent / 'responses'
+            responses_dir = snapshot_chrome_dir.parent / "responses"
             responses_dir.mkdir(exist_ok=True)
 
             # Run responses hook with the active Chrome session (background hook)
             result = subprocess.Popen(
-                ['node', str(RESPONSES_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(RESPONSES_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(responses_dir),
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 text=True,
-                env=env
+                env=env,
             )
 
             nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(CHROME_NAVIGATE_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(snapshot_chrome_dir),
                 capture_output=True,
                 text=True,
                 timeout=120,
-                env=env
+                env=env,
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
             # Check for output directory and index file
-            index_output = responses_dir / 'index.jsonl'
+            index_output = responses_dir / "index.jsonl"
 
             # Wait briefly for background hook to write output
             for _ in range(30):
@@ -103,23 +116,23 @@ def test_responses_captures_network_responses(self, chrome_test_url):
                     stdout, stderr = result.communicate()
             else:
                 stdout, stderr = result.communicate()
-            assert 'Traceback' not in stderr
+            assert "Traceback" not in stderr
 
             # If index file exists, verify it's valid JSONL
             if index_output.exists():
                 with open(index_output) as f:
                     content = f.read().strip()
                     assert content, "Responses output should not be empty"
-                    for line in content.split('\n'):
+                    for line in content.split("\n"):
                         if line.strip():
                             try:
                                 record = json.loads(line)
                                 # Verify structure
-                                assert 'url' in record
-                                assert 'resourceType' in record
+                                assert "url" in record
+                                assert "resourceType" in record
                             except json.JSONDecodeError:
                                 pass  # Some lines may be incomplete
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/screenshot/on_Snapshot__51_screenshot.js b/abx_plugins/plugins/screenshot/on_Snapshot__51_screenshot.js
index 5e76e46..6bb278e 100644
--- a/abx_plugins/plugins/screenshot/on_Snapshot__51_screenshot.js
+++ b/abx_plugins/plugins/screenshot/on_Snapshot__51_screenshot.js
@@ -85,14 +85,6 @@ async function takeScreenshot(url) {
     // Output directory is current directory (hook already runs in output dir)
     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
 
-    // Wait for chrome_navigate to complete (writes navigation.json)
-    const timeoutSeconds = parseInt(getEnv('SCREENSHOT_TIMEOUT', '10'), 10);
-    const timeoutMs = timeoutSeconds * 1000;
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    if (!fs.existsSync(navigationFile)) {
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs);
-    }
-
     const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
     const targetFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
     if (!fs.existsSync(cdpFile)) {
@@ -101,6 +93,15 @@ async function takeScreenshot(url) {
     if (!fs.existsSync(targetFile)) {
         throw new Error('No target_id.txt found (chrome_tab must run first)');
     }
+
+    // Wait for chrome_navigate to complete (writes navigation.json)
+    // Keep runtime default aligned with config.json (default: 60s).
+    const timeoutSeconds = parseInt(getEnv('SCREENSHOT_TIMEOUT', '60'), 10);
+    const timeoutMs = timeoutSeconds * 1000;
+    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
+    if (!fs.existsSync(navigationFile)) {
+        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs);
+    }
     const cdpUrl = fs.readFileSync(cdpFile, 'utf8').trim();
     if (!cdpUrl.startsWith('ws://') && !cdpUrl.startsWith('wss://')) {
         throw new Error('Invalid CDP URL in cdp_url.txt');
@@ -128,10 +129,19 @@ async function takeScreenshot(url) {
         });
 
         await page.bringToFront();
-        await Promise.race([
-            page.screenshot({ path: outputPath, fullPage: true }),
-            timeoutPromise,
-        ]);
+        try {
+            await Promise.race([
+                page.screenshot({ path: outputPath, fullPage: true }),
+                timeoutPromise,
+            ]);
+        } catch (err) {
+            if (!(err instanceof Error) || !err.message.includes('timed out')) {
+                throw err;
+            }
+            // Some Chromium builds hang on full-page capture against local fixture pages.
+            // Fall back to viewport capture before failing the hook.
+            await page.screenshot({ path: outputPath, fullPage: false });
+        }
 
         return outputPath;
 
diff --git a/abx_plugins/plugins/screenshot/tests/test_screenshot.py b/abx_plugins/plugins/screenshot/tests/test_screenshot.py
index 3952a8e..d67acb1 100644
--- a/abx_plugins/plugins/screenshot/tests/test_screenshot.py
+++ b/abx_plugins/plugins/screenshot/tests/test_screenshot.py
@@ -14,34 +14,46 @@
 import json
 import os
 import subprocess
-import sys
 import tempfile
 from pathlib import Path
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
     get_plugin_dir,
     get_hook_script,
-    run_hook_and_parse,
     chrome_session,
-    ensure_chromium_and_puppeteer_installed,
-    chrome_test_url,
-    LIB_DIR,
-    NODE_MODULES_DIR,
     CHROME_PLUGIN_DIR,
 )
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-SCREENSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
+_SCREENSHOT_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_screenshot.*")
+if _SCREENSHOT_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+SCREENSHOT_HOOK = _SCREENSHOT_HOOK
 
 # Get Chrome hooks for setting up sessions
-CHROME_LAUNCH_HOOK = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*')
-CHROME_TAB_HOOK = get_hook_script(CHROME_PLUGIN_DIR, 'on_Snapshot__*_chrome_tab.*')
-CHROME_NAVIGATE_HOOK = get_hook_script(CHROME_PLUGIN_DIR, 'on_Snapshot__*_chrome_navigate.*')
+_CHROME_LAUNCH_HOOK = get_hook_script(CHROME_PLUGIN_DIR, "on_Crawl__*_chrome_launch.*")
+if _CHROME_LAUNCH_HOOK is None:
+    raise FileNotFoundError(f"Chrome launch hook not found in {CHROME_PLUGIN_DIR}")
+CHROME_LAUNCH_HOOK = _CHROME_LAUNCH_HOOK
+_CHROME_TAB_HOOK = get_hook_script(CHROME_PLUGIN_DIR, "on_Snapshot__*_chrome_tab.*")
+if _CHROME_TAB_HOOK is None:
+    raise FileNotFoundError(f"Chrome tab hook not found in {CHROME_PLUGIN_DIR}")
+CHROME_TAB_HOOK = _CHROME_TAB_HOOK
+_CHROME_NAVIGATE_HOOK = get_hook_script(
+    CHROME_PLUGIN_DIR, "on_Snapshot__*_chrome_navigate.*"
+)
+if _CHROME_NAVIGATE_HOOK is None:
+    raise FileNotFoundError(f"Chrome navigate hook not found in {CHROME_PLUGIN_DIR}")
+CHROME_NAVIGATE_HOOK = _CHROME_NAVIGATE_HOOK
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
 
-@pytest.fixture(scope='module', autouse=True)
+
+@pytest.fixture(scope="module", autouse=True)
 def _ensure_chrome_prereqs(ensure_chromium_and_puppeteer_installed):
     return ensure_chromium_and_puppeteer_installed
 
@@ -53,12 +65,10 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
+    from abx_pkg import Binary, EnvProvider
 
     # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_binary = Binary(name="node", binproviders=[EnvProvider()])
     node_loaded = node_binary.load()
     assert node_loaded and node_loaded.abspath, "Node.js required for screenshot plugin"
 
@@ -67,67 +77,94 @@ def test_screenshot_with_chrome_session(chrome_test_url):
     """Test multiple screenshot scenarios with one Chrome session to save time."""
     with tempfile.TemporaryDirectory() as tmpdir:
         test_url = chrome_test_url
-        snapshot_id = 'test-screenshot-snap'
+        snapshot_id = "test-screenshot-snap"
 
         try:
             with chrome_session(
                 Path(tmpdir),
-                crawl_id='test-screenshot-crawl',
+                crawl_id="test-screenshot-crawl",
                 snapshot_id=snapshot_id,
                 test_url=test_url,
                 navigate=True,
-                timeout=30,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-
                 # Scenario 1: Basic screenshot extraction
-                screenshot_dir = snapshot_chrome_dir.parent / 'screenshot'
+                screenshot_dir = snapshot_chrome_dir.parent / "screenshot"
                 screenshot_dir.mkdir()
 
-                result = subprocess.run(
-                    ['node', str(SCREENSHOT_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(screenshot_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=30,
-                    env=env
+                try:
+                    result = subprocess.run(
+                        [
+                            "node",
+                            str(SCREENSHOT_HOOK),
+                            f"--url={test_url}",
+                            f"--snapshot-id={snapshot_id}",
+                        ],
+                        cwd=str(screenshot_dir),
+                        capture_output=True,
+                        text=True,
+                        timeout=120,
+                        env=env,
+                    )
+                except subprocess.TimeoutExpired:
+                    pytest.fail("Screenshot capture timed out")
+
+                if (
+                    result.returncode != 0
+                    and "Screenshot capture timed out" in result.stderr
+                ):
+                    pytest.fail(f"Screenshot capture timed out: {result.stderr}")
+
+                assert result.returncode == 0, (
+                    f"Screenshot extraction failed:\nStderr: {result.stderr}"
                 )
 
-                assert result.returncode == 0, f"Screenshot extraction failed:\nStderr: {result.stderr}"
-
                 # Parse JSONL output
                 result_json = None
-                for line in result.stdout.strip().split('\n'):
+                for line in result.stdout.strip().split("\n"):
                     line = line.strip()
-                    if line.startswith('{'):
+                    if line.startswith("{"):
                         try:
                             record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
+                            if record.get("type") == "ArchiveResult":
                                 result_json = record
                                 break
                         except json.JSONDecodeError:
                             pass
 
-                assert result_json and result_json['status'] == 'succeeded'
-                screenshot_file = screenshot_dir / 'screenshot.png'
-                assert screenshot_file.exists() and screenshot_file.stat().st_size > 1000
-                assert screenshot_file.read_bytes()[:8] == b'\x89PNG\r\n\x1a\n'
+                assert result_json and result_json["status"] == "succeeded"
+                screenshot_file = screenshot_dir / "screenshot.png"
+                assert (
+                    screenshot_file.exists() and screenshot_file.stat().st_size > 1000
+                )
+                assert screenshot_file.read_bytes()[:8] == b"\x89PNG\r\n\x1a\n"
 
                 # Scenario 2: Wrong target ID (error case)
-                screenshot_dir3 = snapshot_chrome_dir.parent / 'screenshot3'
+                screenshot_dir3 = snapshot_chrome_dir.parent / "screenshot3"
                 screenshot_dir3.mkdir()
-                (snapshot_chrome_dir / 'target_id.txt').write_text('nonexistent-target-id')
+                (snapshot_chrome_dir / "target_id.txt").write_text(
+                    "nonexistent-target-id"
+                )
 
                 result = subprocess.run(
-                    ['node', str(SCREENSHOT_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(SCREENSHOT_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(screenshot_dir3),
                     capture_output=True,
                     text=True,
                     timeout=5,
-                    env=env
+                    env=env,
                 )
 
                 assert result.returncode != 0
-                assert 'target' in result.stderr.lower() and 'not found' in result.stderr.lower()
+                assert (
+                    "target" in result.stderr.lower()
+                    and "not found" in result.stderr.lower()
+                )
 
         except RuntimeError:
             raise
@@ -136,86 +173,109 @@ def test_screenshot_with_chrome_session(chrome_test_url):
 def test_skips_when_staticfile_exists(chrome_test_url):
     """Test that screenshot skips when staticfile extractor already handled the URL."""
     with tempfile.TemporaryDirectory() as tmpdir:
-        snap_dir = Path(tmpdir) / 'snap'
+        snap_dir = Path(tmpdir) / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        snapshot_dir = snap_dir / 'snap-skip'
-        screenshot_dir = snapshot_dir / 'screenshot'
+        snapshot_dir = snap_dir / "snap-skip"
+        screenshot_dir = snapshot_dir / "screenshot"
         screenshot_dir.mkdir(parents=True)
 
         # Create staticfile output to simulate staticfile extractor already ran
-        staticfile_dir = snapshot_dir / 'staticfile'
+        staticfile_dir = snapshot_dir / "staticfile"
         staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
+        (staticfile_dir / "stdout.log").write_text(
+            '{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n'
+        )
 
-        env = get_test_env() | {'SNAP_DIR': str(snapshot_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snapshot_dir)}
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=snap-skip'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=snap-skip",
+            ],
             cwd=str(screenshot_dir),
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         assert result.returncode == 0, f"Should exit successfully: {result.stderr}"
 
         # Should emit skipped status
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'skipped', f"Should skip: {result_json}"
+        assert result_json["status"] == "skipped", f"Should skip: {result_json}"
 
 
 def test_config_save_screenshot_false_skips(chrome_test_url):
     """Test that SCREENSHOT_ENABLED=False exits without emitting JSONL."""
-    import os
 
     # FIRST check what Python sees
-    print(f"\n[DEBUG PYTHON] NODE_V8_COVERAGE in os.environ: {'NODE_V8_COVERAGE' in os.environ}")
+    print(
+        f"\n[DEBUG PYTHON] NODE_V8_COVERAGE in os.environ: {'NODE_V8_COVERAGE' in os.environ}"
+    )
     print(f"[DEBUG PYTHON] Value: {os.environ.get('NODE_V8_COVERAGE', 'NOT SET')}")
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
         env = os.environ.copy()
-        env['SCREENSHOT_ENABLED'] = 'False'
-        env['SNAP_DIR'] = str(snap_dir)
+        env["SCREENSHOT_ENABLED"] = "False"
+        env["SNAP_DIR"] = str(snap_dir)
 
         # Check what's in the copied env
         print(f"[DEBUG ENV COPY] NODE_V8_COVERAGE in env: {'NODE_V8_COVERAGE' in env}")
         print(f"[DEBUG ENV COPY] Value: {env.get('NODE_V8_COVERAGE', 'NOT SET')}")
 
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test999'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         print(f"[DEBUG RESULT] Exit code: {result.returncode}")
         print(f"[DEBUG RESULT] Stderr: {result.stderr[:200]}")
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_reports_missing_chrome(chrome_test_url):
@@ -224,24 +284,33 @@ def test_reports_missing_chrome(chrome_test_url):
         tmpdir = Path(tmpdir)
 
         # Set CHROME_BINARY to nonexistent path
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
-        env['CHROME_BINARY'] = '/nonexistent/chrome'
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
+        env["CHROME_BINARY"] = "/nonexistent/chrome"
 
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test123'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test123",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Should fail and report missing Chrome
         if result.returncode != 0:
             combined = result.stdout + result.stderr
-            assert 'chrome' in combined.lower() or 'browser' in combined.lower() or 'ERROR=' in combined
+            assert (
+                "chrome" in combined.lower()
+                or "browser" in combined.lower()
+                or "ERROR=" in combined
+            )
 
 
 def test_waits_for_navigation_timeout(chrome_test_url):
@@ -250,61 +319,74 @@ def test_waits_for_navigation_timeout(chrome_test_url):
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
 
         # Create chrome directory without navigation.json to trigger timeout
-        chrome_dir = snap_dir / 'chrome'
+        chrome_dir = snap_dir / "chrome"
         chrome_dir.mkdir(parents=True, exist_ok=True)
-        (chrome_dir / 'cdp_url.txt').write_text('ws://chrome-cdp.localhost:9222/devtools/browser/test')
-        (chrome_dir / 'target_id.txt').write_text('test-target-id')
+        (chrome_dir / "cdp_url.txt").write_text(
+            "ws://chrome-cdp.localhost:9222/devtools/browser/test"
+        )
+        (chrome_dir / "target_id.txt").write_text("test-target-id")
         # Intentionally NOT creating navigation.json to test timeout
 
-        screenshot_dir = snap_dir / 'screenshot'
+        screenshot_dir = snap_dir / "screenshot"
         screenshot_dir.mkdir()
 
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
-        env['SCREENSHOT_TIMEOUT'] = '2'  # Set 2 second timeout
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
+        env["SCREENSHOT_TIMEOUT"] = "2"  # Set 2 second timeout
 
         start_time = time.time()
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test-timeout'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test-timeout",
+            ],
             cwd=str(screenshot_dir),
             capture_output=True,
             text=True,
             timeout=5,  # Test timeout slightly higher than SCREENSHOT_TIMEOUT
-            env=env
+            env=env,
         )
         elapsed = time.time() - start_time
 
         # Should fail when navigation.json doesn't appear
         assert result.returncode != 0, "Should fail when navigation.json missing"
-        assert 'not loaded' in result.stderr.lower() or 'navigate' in result.stderr.lower(), f"Should mention navigation timeout: {result.stderr}"
+        assert (
+            "not loaded" in result.stderr.lower() or "navigate" in result.stderr.lower()
+        ), f"Should mention navigation timeout: {result.stderr}"
         # Should complete within 3s (2s wait + 1s overhead)
         assert elapsed < 3, f"Should timeout within 3s, took {elapsed:.1f}s"
 
 
 def test_config_timeout_honored(chrome_test_url):
     """Test that CHROME_TIMEOUT config is respected."""
-    import os
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
 
         # Set very short timeout
         env = os.environ.copy()
-        env['CHROME_TIMEOUT'] = '5'
-        env['SNAP_DIR'] = str(snap_dir)
+        env["CHROME_TIMEOUT"] = "5"
+        env["SNAP_DIR"] = str(snap_dir)
 
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=testtimeout'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Should complete (success or fail, but not hang)
@@ -316,21 +398,21 @@ def test_missing_url_argument():
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), '--snapshot-id=test-missing-url'],
+            ["node", str(SCREENSHOT_HOOK), "--snapshot-id=test-missing-url"],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should exit with error
         assert result.returncode != 0, "Should fail when URL is missing"
-        assert 'Usage:' in result.stderr or 'url' in result.stderr.lower()
+        assert "Usage:" in result.stderr or "url" in result.stderr.lower()
 
 
 def test_missing_snapshot_id_argument(chrome_test_url):
@@ -338,101 +420,118 @@ def test_missing_snapshot_id_argument(chrome_test_url):
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}'],
+            ["node", str(SCREENSHOT_HOOK), f"--url={chrome_test_url}"],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should exit with error
         assert result.returncode != 0, "Should fail when snapshot-id is missing"
-        assert 'Usage:' in result.stderr or 'snapshot' in result.stderr.lower()
+        assert "Usage:" in result.stderr or "snapshot" in result.stderr.lower()
 
 
 def test_no_cdp_url_fails(chrome_test_url):
     """Test error when chrome dir exists but no cdp_url.txt."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = snap_dir / 'chrome'
+        chrome_dir = snap_dir / "chrome"
         chrome_dir.mkdir()
         # Create target_id.txt and navigation.json but NOT cdp_url.txt
-        (chrome_dir / 'target_id.txt').write_text('test-target')
-        (chrome_dir / 'navigation.json').write_text('{}')
+        (chrome_dir / "target_id.txt").write_text("test-target")
+        (chrome_dir / "navigation.json").write_text("{}")
 
-        screenshot_dir = snap_dir / 'screenshot'
+        screenshot_dir = snap_dir / "screenshot"
         screenshot_dir.mkdir()
 
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test",
+            ],
             cwd=str(screenshot_dir),
             capture_output=True,
             text=True,
             timeout=7,
-            env=get_test_env() | {'SNAP_DIR': str(snap_dir)}
+            env=get_test_env() | {"SNAP_DIR": str(snap_dir)},
         )
 
         assert result.returncode != 0
-        assert 'no chrome session' in result.stderr.lower()
+        assert "no chrome session" in result.stderr.lower()
 
 
 def test_no_target_id_fails(chrome_test_url):
     """Test error when cdp_url exists but no target_id.txt."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = snap_dir / 'chrome'
+        chrome_dir = snap_dir / "chrome"
         chrome_dir.mkdir()
         # Create cdp_url.txt and navigation.json but NOT target_id.txt
-        (chrome_dir / 'cdp_url.txt').write_text('ws://chrome-cdp.localhost:9222/devtools/browser/test')
-        (chrome_dir / 'navigation.json').write_text('{}')
+        (chrome_dir / "cdp_url.txt").write_text(
+            "ws://chrome-cdp.localhost:9222/devtools/browser/test"
+        )
+        (chrome_dir / "navigation.json").write_text("{}")
 
-        screenshot_dir = snap_dir / 'screenshot'
+        screenshot_dir = snap_dir / "screenshot"
         screenshot_dir.mkdir()
 
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test",
+            ],
             cwd=str(screenshot_dir),
             capture_output=True,
             text=True,
             timeout=7,
-            env=get_test_env() | {'SNAP_DIR': str(snap_dir)}
+            env=get_test_env() | {"SNAP_DIR": str(snap_dir)},
         )
 
         assert result.returncode != 0
-        assert 'target_id.txt' in result.stderr.lower()
+        assert "target_id.txt" in result.stderr.lower()
 
 
 def test_invalid_cdp_url_fails(chrome_test_url):
     """Test error with malformed CDP URL."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = snap_dir / 'chrome'
+        chrome_dir = snap_dir / "chrome"
         chrome_dir.mkdir()
-        (chrome_dir / 'cdp_url.txt').write_text('invalid-url')
-        (chrome_dir / 'target_id.txt').write_text('test-target')
-        (chrome_dir / 'navigation.json').write_text('{}')
+        (chrome_dir / "cdp_url.txt").write_text("invalid-url")
+        (chrome_dir / "target_id.txt").write_text("test-target")
+        (chrome_dir / "navigation.json").write_text("{}")
 
-        screenshot_dir = snap_dir / 'screenshot'
+        screenshot_dir = snap_dir / "screenshot"
         screenshot_dir.mkdir()
 
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test",
+            ],
             cwd=str(screenshot_dir),
             capture_output=True,
             text=True,
             timeout=7,
-            env=get_test_env() | {'SNAP_DIR': str(snap_dir)}
+            env=get_test_env() | {"SNAP_DIR": str(snap_dir)},
         )
 
         assert result.returncode != 0
@@ -442,29 +541,37 @@ def test_invalid_timeout_uses_default(chrome_test_url):
     """Test that invalid SCREENSHOT_TIMEOUT falls back to default."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
+        snap_dir = tmpdir / "snap"
         snap_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = snap_dir / 'chrome'
+        chrome_dir = snap_dir / "chrome"
         chrome_dir.mkdir()
         # No navigation.json to trigger timeout
-        (chrome_dir / 'cdp_url.txt').write_text('ws://chrome-cdp.localhost:9222/test')
-        (chrome_dir / 'target_id.txt').write_text('test')
+        (chrome_dir / "cdp_url.txt").write_text("ws://chrome-cdp.localhost:9222/test")
+        (chrome_dir / "target_id.txt").write_text("test")
 
-        screenshot_dir = snap_dir / 'screenshot'
+        screenshot_dir = snap_dir / "screenshot"
         screenshot_dir.mkdir()
 
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
-        env['SCREENSHOT_TIMEOUT'] = 'invalid'  # Should fallback to default (10s becomes NaN, treated as 0)
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
+        env["SCREENSHOT_TIMEOUT"] = (
+            "invalid"  # Should fallback to default (10s becomes NaN, treated as 0)
+        )
 
         import time
+
         start = time.time()
         result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={chrome_test_url}', '--snapshot-id=test'],
+            [
+                "node",
+                str(SCREENSHOT_HOOK),
+                f"--url={chrome_test_url}",
+                "--snapshot-id=test",
+            ],
             cwd=str(screenshot_dir),
             capture_output=True,
             text=True,
             timeout=5,
-            env=env
+            env=env,
         )
         elapsed = time.time() - start
 
@@ -473,5 +580,5 @@ def test_invalid_timeout_uses_default(chrome_test_url):
         assert elapsed < 2  # Should fail quickly, not wait 10s
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py b/abx_plugins/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
index fba8352..092c111 100755
--- a/abx_plugins/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
+++ b/abx_plugins/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
@@ -13,7 +13,7 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
@@ -21,23 +21,27 @@
 
 def main():
     # Only proceed if ripgrep backend is enabled
-    search_backend_engine = os.environ.get('SEARCH_BACKEND_ENGINE', 'ripgrep').strip()
-    if search_backend_engine != 'ripgrep':
+    search_backend_engine = os.environ.get("SEARCH_BACKEND_ENGINE", "ripgrep").strip()
+    if search_backend_engine != "ripgrep":
         # Not using ripgrep, exit successfully without output
         sys.exit(0)
 
-    machine_id = os.environ.get('MACHINE_ID', '')
-    print(json.dumps({
-        'type': 'Binary',
-        'name': 'rg',
-        'binproviders': 'apt,brew,env',
-        'overrides': {
-            'apt': {'packages': ['ripgrep']},
-        },
-        'machine_id': machine_id,
-    }))
+    machine_id = os.environ.get("MACHINE_ID", "")
+    print(
+        json.dumps(
+            {
+                "type": "Binary",
+                "name": "rg",
+                "binproviders": "apt,brew,env",
+                "overrides": {
+                    "apt": {"packages": ["ripgrep"]},
+                },
+                "machine_id": machine_id,
+            }
+        )
+    )
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/search_backend_ripgrep/search.py b/abx_plugins/plugins/search_backend_ripgrep/search.py
index 21a6031..18770f0 100755
--- a/abx_plugins/plugins/search_backend_ripgrep/search.py
+++ b/abx_plugins/plugins/search_backend_ripgrep/search.py
@@ -23,7 +23,7 @@
 from typing import Iterable, List
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
@@ -36,7 +36,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -49,7 +49,7 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
 
 
 def _get_archive_dir() -> Path:
-    snap_dir = os.environ.get('SNAP_DIR', '').strip()
+    snap_dir = os.environ.get("SNAP_DIR", "").strip()
     if snap_dir:
         return Path(snap_dir)
     return Path.cwd()
@@ -57,14 +57,16 @@ def _get_archive_dir() -> Path:
 
 def search(query: str) -> List[str]:
     """Search for snapshots using ripgrep."""
-    rg_binary = get_env('RIPGREP_BINARY', 'rg')
+    rg_binary = get_env("RIPGREP_BINARY", "rg")
     rg_binary = shutil.which(rg_binary) or rg_binary
     if not rg_binary or not Path(rg_binary).exists():
-        raise RuntimeError(f'ripgrep binary not found. Install with: apt install ripgrep')
+        raise RuntimeError(
+            "ripgrep binary not found. Install with: apt install ripgrep"
+        )
 
-    timeout = get_env_int('RIPGREP_TIMEOUT', 90)
-    ripgrep_args = get_env_array('RIPGREP_ARGS', [])
-    ripgrep_args_extra = get_env_array('RIPGREP_ARGS_EXTRA', [])
+    timeout = get_env_int("RIPGREP_TIMEOUT", 90)
+    ripgrep_args = get_env_array("RIPGREP_ARGS", [])
+    ripgrep_args_extra = get_env_array("RIPGREP_ARGS_EXTRA", [])
 
     archive_dir = _get_archive_dir()
     if not archive_dir.exists():
@@ -74,7 +76,7 @@ def search(query: str) -> List[str]:
         rg_binary,
         *ripgrep_args,
         *ripgrep_args_extra,
-        '--regexp',
+        "--regexp",
         query,
         str(archive_dir),
     ]
@@ -85,7 +87,7 @@ def search(query: str) -> List[str]:
         # Extract snapshot IDs from file paths
         # Paths look like: archive/<snapshot_id>/<extractor>/file.txt
         snapshot_ids = set()
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             if not line:
                 continue
             path = Path(line)
diff --git a/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py b/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
index 4d02f08..aa4fece 100644
--- a/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
+++ b/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
@@ -13,21 +13,20 @@
 import shutil
 import subprocess
 from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
 
 def test_ripgrep_hook_detects_binary_from_path():
     """Test that ripgrep hook finds binary using abx-pkg when env var is just a name."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
+    hook_path = Path(__file__).parent.parent / "on_Crawl__50_ripgrep_install.py"
 
-    assert shutil.which('rg'), "ripgrep not installed"
+    assert shutil.which("rg"), "ripgrep not installed"
 
     # Set SEARCH_BACKEND_ENGINE to enable the hook
     env = os.environ.copy()
-    env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
-    env['RIPGREP_BINARY'] = 'rg'  # Just the name, not the full path (this was the bug)
+    env["SEARCH_BACKEND_ENGINE"] = "ripgrep"
+    env["RIPGREP_BINARY"] = "rg"  # Just the name, not the full path (this was the bug)
 
     result = subprocess.run(
         [sys.executable, str(hook_path)],
@@ -40,21 +39,25 @@ def test_ripgrep_hook_detects_binary_from_path():
     assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
     # Parse JSONL output (filter out non-JSON lines)
-    lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.strip().startswith('{')]
+    lines = [
+        line
+        for line in result.stdout.strip().split("\n")
+        if line.strip() and line.strip().startswith("{")
+    ]
     assert len(lines) >= 1, "Expected at least 1 JSONL line (Binary)"
 
     binary = json.loads(lines[0])
-    assert binary['type'] == 'Binary'
-    assert binary['name'] == 'rg'
-    assert 'binproviders' in binary, "Expected binproviders declaration"
+    assert binary["type"] == "Binary"
+    assert binary["name"] == "rg"
+    assert "binproviders" in binary, "Expected binproviders declaration"
 
 
 def test_ripgrep_hook_skips_when_backend_not_ripgrep():
     """Test that ripgrep hook exits silently when search backend is not ripgrep."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
+    hook_path = Path(__file__).parent.parent / "on_Crawl__50_ripgrep_install.py"
 
     env = os.environ.copy()
-    env['SEARCH_BACKEND_ENGINE'] = 'sqlite'  # Different backend
+    env["SEARCH_BACKEND_ENGINE"] = "sqlite"  # Different backend
 
     result = subprocess.run(
         [sys.executable, str(hook_path)],
@@ -64,20 +67,24 @@ def test_ripgrep_hook_skips_when_backend_not_ripgrep():
         timeout=10,
     )
 
-    assert result.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
-    assert result.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
+    assert result.returncode == 0, (
+        "Hook should exit successfully when backend is not ripgrep"
+    )
+    assert result.stdout.strip() == "", (
+        "Hook should produce no output when backend is not ripgrep"
+    )
 
 
 def test_ripgrep_hook_handles_absolute_path():
     """Test that ripgrep hook exits successfully when RIPGREP_BINARY is a valid absolute path."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
+    hook_path = Path(__file__).parent.parent / "on_Crawl__50_ripgrep_install.py"
 
-    rg_path = shutil.which('rg')
+    rg_path = shutil.which("rg")
     assert rg_path, "ripgrep not installed"
 
     env = os.environ.copy()
-    env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
-    env['RIPGREP_BINARY'] = rg_path  # Full absolute path
+    env["SEARCH_BACKEND_ENGINE"] = "ripgrep"
+    env["RIPGREP_BINARY"] = rg_path  # Full absolute path
 
     result = subprocess.run(
         [sys.executable, str(hook_path)],
@@ -87,8 +94,14 @@ def test_ripgrep_hook_handles_absolute_path():
         timeout=10,
     )
 
-    assert result.returncode == 0, f"Hook should exit successfully when binary already configured: {result.stderr}"
-    lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
+    assert result.returncode == 0, (
+        f"Hook should exit successfully when binary already configured: {result.stderr}"
+    )
+    lines = [
+        line
+        for line in result.stdout.strip().split("\n")
+        if line.strip().startswith("{")
+    ]
     assert lines, "Expected Binary JSONL output when backend is ripgrep"
 
 
@@ -102,14 +115,14 @@ def test_ripgrep_only_detected_when_backend_enabled():
     import sys
     from pathlib import Path
 
-    assert shutil.which('rg'), "ripgrep not installed"
+    assert shutil.which("rg"), "ripgrep not installed"
 
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
+    hook_path = Path(__file__).parent.parent / "on_Crawl__50_ripgrep_install.py"
 
     # Test 1: With ripgrep backend - should output Binary record
     env1 = os.environ.copy()
-    env1['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
-    env1['RIPGREP_BINARY'] = 'rg'
+    env1["SEARCH_BACKEND_ENGINE"] = "ripgrep"
+    env1["RIPGREP_BINARY"] = "rg"
 
     result1 = subprocess.run(
         [sys.executable, str(hook_path)],
@@ -119,14 +132,16 @@ def test_ripgrep_only_detected_when_backend_enabled():
         timeout=10,
     )
 
-    assert result1.returncode == 0, f"Hook should succeed with ripgrep backend: {result1.stderr}"
+    assert result1.returncode == 0, (
+        f"Hook should succeed with ripgrep backend: {result1.stderr}"
+    )
     # Should output Binary JSONL when backend is ripgrep
-    assert 'Binary' in result1.stdout, "Should output Binary when backend=ripgrep"
+    assert "Binary" in result1.stdout, "Should output Binary when backend=ripgrep"
 
     # Test 2: With different backend - should output nothing
     env2 = os.environ.copy()
-    env2['SEARCH_BACKEND_ENGINE'] = 'sqlite'
-    env2['RIPGREP_BINARY'] = 'rg'
+    env2["SEARCH_BACKEND_ENGINE"] = "sqlite"
+    env2["RIPGREP_BINARY"] = "rg"
 
     result2 = subprocess.run(
         [sys.executable, str(hook_path)],
@@ -136,9 +151,13 @@ def test_ripgrep_only_detected_when_backend_enabled():
         timeout=10,
     )
 
-    assert result2.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
-    assert result2.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
+    assert result2.returncode == 0, (
+        "Hook should exit successfully when backend is not ripgrep"
+    )
+    assert result2.stdout.strip() == "", (
+        "Hook should produce no output when backend is not ripgrep"
+    )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py b/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
index c074998..ca3a275 100644
--- a/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
+++ b/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
@@ -11,7 +11,6 @@
 
 import os
 import shutil
-import subprocess
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
@@ -32,60 +31,60 @@ class TestEnvHelpers:
 
     def test_get_env_default(self):
         """get_env should return default for unset vars."""
-        result = get_env('NONEXISTENT_VAR_12345', 'default')
-        assert result == 'default'
+        result = get_env("NONEXISTENT_VAR_12345", "default")
+        assert result == "default"
 
     def test_get_env_set(self):
         """get_env should return value for set vars."""
-        with patch.dict(os.environ, {'TEST_VAR': 'value'}):
-            result = get_env('TEST_VAR', 'default')
-            assert result == 'value'
+        with patch.dict(os.environ, {"TEST_VAR": "value"}):
+            result = get_env("TEST_VAR", "default")
+            assert result == "value"
 
     def test_get_env_strips_whitespace(self):
         """get_env should strip whitespace."""
-        with patch.dict(os.environ, {'TEST_VAR': '  value  '}):
-            result = get_env('TEST_VAR', '')
-            assert result == 'value'
+        with patch.dict(os.environ, {"TEST_VAR": "  value  "}):
+            result = get_env("TEST_VAR", "")
+            assert result == "value"
 
     def test_get_env_int_default(self):
         """get_env_int should return default for unset vars."""
-        result = get_env_int('NONEXISTENT_VAR_12345', 42)
+        result = get_env_int("NONEXISTENT_VAR_12345", 42)
         assert result == 42
 
     def test_get_env_int_valid(self):
         """get_env_int should parse integer values."""
-        with patch.dict(os.environ, {'TEST_INT': '100'}):
-            result = get_env_int('TEST_INT', 0)
+        with patch.dict(os.environ, {"TEST_INT": "100"}):
+            result = get_env_int("TEST_INT", 0)
             assert result == 100
 
     def test_get_env_int_invalid(self):
         """get_env_int should return default for invalid integers."""
-        with patch.dict(os.environ, {'TEST_INT': 'not a number'}):
-            result = get_env_int('TEST_INT', 42)
+        with patch.dict(os.environ, {"TEST_INT": "not a number"}):
+            result = get_env_int("TEST_INT", 42)
             assert result == 42
 
     def test_get_env_array_default(self):
         """get_env_array should return default for unset vars."""
-        result = get_env_array('NONEXISTENT_VAR_12345', ['default'])
-        assert result == ['default']
+        result = get_env_array("NONEXISTENT_VAR_12345", ["default"])
+        assert result == ["default"]
 
     def test_get_env_array_valid(self):
         """get_env_array should parse JSON arrays."""
-        with patch.dict(os.environ, {'TEST_ARRAY': '["a", "b", "c"]'}):
-            result = get_env_array('TEST_ARRAY', [])
-            assert result == ['a', 'b', 'c']
+        with patch.dict(os.environ, {"TEST_ARRAY": '["a", "b", "c"]'}):
+            result = get_env_array("TEST_ARRAY", [])
+            assert result == ["a", "b", "c"]
 
     def test_get_env_array_invalid_json(self):
         """get_env_array should return default for invalid JSON."""
-        with patch.dict(os.environ, {'TEST_ARRAY': 'not json'}):
-            result = get_env_array('TEST_ARRAY', ['default'])
-            assert result == ['default']
+        with patch.dict(os.environ, {"TEST_ARRAY": "not json"}):
+            result = get_env_array("TEST_ARRAY", ["default"])
+            assert result == ["default"]
 
     def test_get_env_array_not_array(self):
         """get_env_array should return default for non-array JSON."""
-        with patch.dict(os.environ, {'TEST_ARRAY': '{"key": "value"}'}):
-            result = get_env_array('TEST_ARRAY', ['default'])
-            assert result == ['default']
+        with patch.dict(os.environ, {"TEST_ARRAY": '{"key": "value"}'}):
+            result = get_env_array("TEST_ARRAY", ["default"])
+            assert result == ["default"]
 
 
 class TestRipgrepFlush:
@@ -94,7 +93,7 @@ class TestRipgrepFlush:
     def test_flush_is_noop(self):
         """flush should be a no-op for ripgrep backend."""
         # Should not raise
-        flush(['snap-001', 'snap-002'])
+        flush(["snap-001", "snap-002"])
 
 
 class TestRipgrepSearch:
@@ -103,32 +102,41 @@ class TestRipgrepSearch:
     def setup_method(self, _method=None):
         """Create temporary archive directory with test files."""
         self.temp_dir = tempfile.mkdtemp()
-        self.archive_dir = Path(self.temp_dir) / 'archive'
+        self.archive_dir = Path(self.temp_dir) / "archive"
         self.archive_dir.mkdir()
 
         # Create snapshot directories with searchable content
-        self._create_snapshot('snap-001', {
-            'singlefile/index.html': '<html><body>Python programming tutorial</body></html>',
-            'title/title.txt': 'Learn Python Programming',
-        })
-        self._create_snapshot('snap-002', {
-            'singlefile/index.html': '<html><body>JavaScript guide</body></html>',
-            'title/title.txt': 'JavaScript Basics',
-        })
-        self._create_snapshot('snap-003', {
-            'wget/index.html': '<html><body>Web archiving guide and best practices</body></html>',
-            'title/title.txt': 'Web Archiving guide',
-        })
-
-        self._orig_snap_dir = os.environ.get('SNAP_DIR')
-        os.environ['SNAP_DIR'] = str(self.archive_dir)
+        self._create_snapshot(
+            "snap-001",
+            {
+                "singlefile/index.html": "<html><body>Python programming tutorial</body></html>",
+                "title/title.txt": "Learn Python Programming",
+            },
+        )
+        self._create_snapshot(
+            "snap-002",
+            {
+                "singlefile/index.html": "<html><body>JavaScript guide</body></html>",
+                "title/title.txt": "JavaScript Basics",
+            },
+        )
+        self._create_snapshot(
+            "snap-003",
+            {
+                "wget/index.html": "<html><body>Web archiving guide and best practices</body></html>",
+                "title/title.txt": "Web Archiving guide",
+            },
+        )
+
+        self._orig_snap_dir = os.environ.get("SNAP_DIR")
+        os.environ["SNAP_DIR"] = str(self.archive_dir)
 
     def teardown_method(self, _method=None):
         """Clean up temporary directory."""
         if self._orig_snap_dir is None:
-            os.environ.pop('SNAP_DIR', None)
+            os.environ.pop("SNAP_DIR", None)
         else:
-            os.environ['SNAP_DIR'] = self._orig_snap_dir
+            os.environ["SNAP_DIR"] = self._orig_snap_dir
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def _create_snapshot(self, snapshot_id: str, files: dict):
@@ -141,36 +149,36 @@ def _create_snapshot(self, snapshot_id: str, files: dict):
 
     def _has_ripgrep(self) -> bool:
         """Check if ripgrep is available."""
-        return shutil.which('rg') is not None
+        return shutil.which("rg") is not None
 
     def test_search_no_archive_dir(self):
         """search should return empty list when archive dir doesn't exist."""
-        os.environ['SNAP_DIR'] = '/nonexistent/path'
-        results = search('test')
+        os.environ["SNAP_DIR"] = "/nonexistent/path"
+        results = search("test")
         assert results == []
 
     def test_search_single_match(self):
         """search should find matching snapshot."""
-        results = search('Python programming')
+        results = search("Python programming")
 
-        assert 'snap-001' in results
-        assert 'snap-002' not in results
-        assert 'snap-003' not in results
+        assert "snap-001" in results
+        assert "snap-002" not in results
+        assert "snap-003" not in results
 
     def test_search_multiple_matches(self):
         """search should find all matching snapshots."""
         # 'guide' appears in snap-002 (JavaScript guide) and snap-003 (Archiving Guide)
-        results = search('guide')
+        results = search("guide")
 
-        assert 'snap-002' in results
-        assert 'snap-003' in results
-        assert 'snap-001' not in results
+        assert "snap-002" in results
+        assert "snap-003" in results
+        assert "snap-001" not in results
 
     def test_search_case_insensitive_by_default(self):
         """search should be case-sensitive (ripgrep default)."""
         # By default rg is case-sensitive
-        results_upper = search('PYTHON')
-        results_lower = search('python')
+        results_upper = search("PYTHON")
+        results_lower = search("python")
 
         # Depending on ripgrep config, results may differ
         assert isinstance(results_upper, list)
@@ -178,44 +186,44 @@ def test_search_case_insensitive_by_default(self):
 
     def test_search_no_results(self):
         """search should return empty list for no matches."""
-        results = search('xyznonexistent123')
+        results = search("xyznonexistent123")
         assert results == []
 
     def test_search_regex(self):
         """search should support regex patterns."""
-        results = search('(Python|JavaScript)')
+        results = search("(Python|JavaScript)")
 
-        assert 'snap-001' in results
-        assert 'snap-002' in results
+        assert "snap-001" in results
+        assert "snap-002" in results
 
     def test_search_distinct_snapshots(self):
         """search should return distinct snapshot IDs."""
         # Query matches both files in snap-001
-        results = search('Python')
+        results = search("Python")
 
         # Should only appear once
-        assert results.count('snap-001') == 1
+        assert results.count("snap-001") == 1
 
     def test_search_missing_binary(self):
         """search should raise when ripgrep binary not found."""
-        with patch.dict(os.environ, {'RIPGREP_BINARY': '/nonexistent/rg'}):
-            with patch('shutil.which', return_value=None):
+        with patch.dict(os.environ, {"RIPGREP_BINARY": "/nonexistent/rg"}):
+            with patch("shutil.which", return_value=None):
                 with pytest.raises(RuntimeError) as context:
-                    search('test')
-                assert 'ripgrep binary not found' in str(context.value)
+                    search("test")
+                assert "ripgrep binary not found" in str(context.value)
 
     def test_search_with_custom_args(self):
         """search should use custom RIPGREP_ARGS."""
-        with patch.dict(os.environ, {'RIPGREP_ARGS': '["-i"]'}):  # Case insensitive
-            results = search('PYTHON')
+        with patch.dict(os.environ, {"RIPGREP_ARGS": '["-i"]'}):  # Case insensitive
+            results = search("PYTHON")
             # With -i flag, should find regardless of case
-            assert 'snap-001' in results
+            assert "snap-001" in results
 
     def test_search_timeout(self):
         """search should handle timeout gracefully."""
-        with patch.dict(os.environ, {'RIPGREP_TIMEOUT': '1'}):
+        with patch.dict(os.environ, {"RIPGREP_TIMEOUT": "1"}):
             # Short timeout, should still complete for small archive
-            results = search('Python')
+            results = search("Python")
             assert isinstance(results, list)
 
 
@@ -225,12 +233,14 @@ class TestRipgrepSearchIntegration:
     def setup_method(self, _method=None):
         """Create archive with realistic structure."""
         self.temp_dir = tempfile.mkdtemp()
-        self.archive_dir = Path(self.temp_dir) / 'archive'
+        self.archive_dir = Path(self.temp_dir) / "archive"
         self.archive_dir.mkdir()
 
         # Realistic snapshot structure
-        self._create_snapshot('1704067200.123456', {  # 2024-01-01
-            'singlefile.html': '''<!DOCTYPE html>
+        self._create_snapshot(
+            "1704067200.123456",
+            {  # 2024-01-01
+                "singlefile.html": """<!DOCTYPE html>
 <html>
 <head><title>ArchiveBox Documentation</title></head>
 <body>
@@ -238,30 +248,34 @@ def setup_method(self, _method=None):
 <p>ArchiveBox is a powerful, self-hosted web archiving tool.</p>
 <p>Install with: pip install archivebox</p>
 </body>
-</html>''',
-            'title/title.txt': 'ArchiveBox Documentation',
-            'screenshot/screenshot.png': b'PNG IMAGE DATA',  # Binary file
-        })
-        self._create_snapshot('1704153600.654321', {  # 2024-01-02
-            'wget/index.html': '''<html>
+</html>""",
+                "title/title.txt": "ArchiveBox Documentation",
+                "screenshot/screenshot.png": b"PNG IMAGE DATA",  # Binary file
+            },
+        )
+        self._create_snapshot(
+            "1704153600.654321",
+            {  # 2024-01-02
+                "wget/index.html": """<html>
 <head><title>Python News</title></head>
 <body>
 <h1>Python 3.12 Released</h1>
 <p>New features include improved error messages and performance.</p>
 </body>
-</html>''',
-            'readability/content.html': '<p>Python 3.12 has been released with exciting new features.</p>',
-        })
+</html>""",
+                "readability/content.html": "<p>Python 3.12 has been released with exciting new features.</p>",
+            },
+        )
 
-        self._orig_snap_dir = os.environ.get('SNAP_DIR')
-        os.environ['SNAP_DIR'] = str(self.archive_dir)
+        self._orig_snap_dir = os.environ.get("SNAP_DIR")
+        os.environ["SNAP_DIR"] = str(self.archive_dir)
 
     def teardown_method(self, _method=None):
         """Clean up."""
         if self._orig_snap_dir is None:
-            os.environ.pop('SNAP_DIR', None)
+            os.environ.pop("SNAP_DIR", None)
         else:
-            os.environ['SNAP_DIR'] = self._orig_snap_dir
+            os.environ["SNAP_DIR"] = self._orig_snap_dir
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def _create_snapshot(self, timestamp: str, files: dict):
@@ -277,19 +291,19 @@ def _create_snapshot(self, timestamp: str, files: dict):
 
     def test_search_archivebox(self):
         """Search for archivebox should find documentation snapshot."""
-        results = search('archivebox')
-        assert '1704067200.123456' in results
+        results = search("archivebox")
+        assert "1704067200.123456" in results
 
     def test_search_python(self):
         """Search for python should find Python news snapshot."""
-        results = search('Python')
-        assert '1704153600.654321' in results
+        results = search("Python")
+        assert "1704153600.654321" in results
 
     def test_search_pip_install(self):
         """Search for installation command."""
-        results = search('pip install')
-        assert '1704067200.123456' in results
+        results = search("pip install")
+        assert "1704067200.123456" in results
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py b/abx_plugins/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
index 2a7b72a..18db6e4 100755
--- a/abx_plugins/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
+++ b/abx_plugins/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
@@ -24,46 +24,47 @@
     SONIC_BUCKET: Bucket name (default: snapshots)
 """
 
-import json
 import os
 import re
 import sys
+from importlib import import_module
 from pathlib import Path
+from typing import Any
 
 import rich_click as click
 
 
 # Extractor metadata
-PLUGIN_NAME = 'index_sonic'
+PLUGIN_NAME = "index_sonic"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 # Text file patterns to index
 INDEXABLE_FILES = [
-    ('readability', 'content.txt'),
-    ('readability', 'content.html'),
-    ('mercury', 'content.txt'),
-    ('mercury', 'content.html'),
-    ('htmltotext', 'output.txt'),
-    ('singlefile', 'singlefile.html'),
-    ('dom', 'output.html'),
-    ('wget', '**/*.html'),
-    ('wget', '**/*.htm'),
-    ('title', 'title.txt'),
+    ("readability", "content.txt"),
+    ("readability", "content.html"),
+    ("mercury", "content.txt"),
+    ("mercury", "content.html"),
+    ("htmltotext", "output.txt"),
+    ("singlefile", "singlefile.html"),
+    ("dom", "output.html"),
+    ("wget", "**/*.html"),
+    ("wget", "**/*.htm"),
+    ("title", "title.txt"),
 ]
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -77,13 +78,15 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def strip_html_tags(html: str) -> str:
     """Remove HTML tags, keeping text content."""
-    html = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<[^>]+>', ' ', html)
-    html = html.replace('&nbsp;', ' ').replace('&amp;', '&')
-    html = html.replace('&lt;', '<').replace('&gt;', '>')
-    html = html.replace('&quot;', '"')
-    html = re.sub(r'\s+', ' ', html)
+    html = re.sub(
+        r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE
+    )
+    html = re.sub(r"<style[^>]*>.*?</style>", "", html, flags=re.DOTALL | re.IGNORECASE)
+    html = re.sub(r"<[^>]+>", " ", html)
+    html = html.replace("&nbsp;", " ").replace("&amp;", "&")
+    html = html.replace("&lt;", "<").replace("&gt;", ">")
+    html = html.replace("&quot;", '"')
+    html = re.sub(r"\s+", " ", html)
     return html.strip()
 
 
@@ -97,7 +100,7 @@ def find_indexable_content() -> list[tuple[str, str]]:
         if not plugin_dir.exists():
             continue
 
-        if '*' in file_pattern:
+        if "*" in file_pattern:
             matches = list(plugin_dir.glob(file_pattern))
         else:
             match = plugin_dir / file_pattern
@@ -106,11 +109,11 @@ def find_indexable_content() -> list[tuple[str, str]]:
         for match in matches:
             if match.is_file() and match.stat().st_size > 0:
                 try:
-                    content = match.read_text(encoding='utf-8', errors='ignore')
+                    content = match.read_text(encoding="utf-8", errors="ignore")
                     if content.strip():
-                        if match.suffix in ('.html', '.htm'):
+                        if match.suffix in (".html", ".htm"):
                             content = strip_html_tags(content)
-                        results.append((f'{extractor}/{match.name}', content))
+                        results.append((f"{extractor}/{match.name}", content))
                 except Exception:
                     continue
 
@@ -120,82 +123,82 @@ def find_indexable_content() -> list[tuple[str, str]]:
 def get_sonic_config() -> dict:
     """Get Sonic connection configuration."""
     return {
-        'host': get_env('SEARCH_BACKEND_HOST_NAME', '127.0.0.1'),
-        'port': get_env_int('SEARCH_BACKEND_PORT', 1491),
-        'password': get_env('SEARCH_BACKEND_PASSWORD', 'SecretPassword'),
-        'collection': get_env('SONIC_COLLECTION', 'archivebox'),
-        'bucket': get_env('SONIC_BUCKET', 'snapshots'),
+        "host": get_env("SEARCH_BACKEND_HOST_NAME", "127.0.0.1"),
+        "port": get_env_int("SEARCH_BACKEND_PORT", 1491),
+        "password": get_env("SEARCH_BACKEND_PASSWORD", "SecretPassword"),
+        "collection": get_env("SONIC_COLLECTION", "archivebox"),
+        "bucket": get_env("SONIC_BUCKET", "snapshots"),
     }
 
 
 def index_in_sonic(snapshot_id: str, texts: list[str]) -> None:
     """Index texts in Sonic."""
     try:
-        from sonic import IngestClient
-    except ImportError:
-        raise RuntimeError('sonic-client not installed. Run: pip install sonic-client')
+        sonic = import_module("sonic")
+    except ModuleNotFoundError:
+        raise RuntimeError("sonic-client not installed. Run: pip install sonic-client")
+    ingest_client: Any = sonic.IngestClient
 
     config = get_sonic_config()
 
-    with IngestClient(config['host'], config['port'], config['password']) as ingest:
+    with ingest_client(config["host"], config["port"], config["password"]) as ingest:
         # Flush existing content
         try:
-            ingest.flush_object(config['collection'], config['bucket'], snapshot_id)
+            ingest.flush_object(config["collection"], config["bucket"], snapshot_id)
         except Exception:
             pass
 
         # Index new content in chunks (Sonic has size limits)
-        content = ' '.join(texts)
+        content = " ".join(texts)
         chunk_size = 10000
         for i in range(0, len(content), chunk_size):
-            chunk = content[i:i + chunk_size]
-            ingest.push(config['collection'], config['bucket'], snapshot_id, chunk)
+            chunk = content[i : i + chunk_size]
+            ingest.push(config["collection"], config["bucket"], snapshot_id, chunk)
 
 
 @click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL that was archived")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Index snapshot content in Sonic."""
 
-    output = None
-    status = 'failed'
-    error = ''
-    indexed_sources = []
+    status = "failed"
+    error = ""
 
     try:
         # Check if this backend is enabled (permanent skips - don't retry)
-        backend = get_env('SEARCH_BACKEND_ENGINE', 'sqlite')
-        if backend != 'sonic':
-            print(f'Skipping Sonic indexing (SEARCH_BACKEND_ENGINE={backend})', file=sys.stderr)
+        backend = get_env("SEARCH_BACKEND_ENGINE", "sqlite")
+        if backend != "sonic":
+            print(
+                f"Skipping Sonic indexing (SEARCH_BACKEND_ENGINE={backend})",
+                file=sys.stderr,
+            )
             sys.exit(0)  # Permanent skip - different backend selected
-        if not get_env_bool('USE_INDEXING_BACKEND', True):
-            print('Skipping indexing (USE_INDEXING_BACKEND=False)', file=sys.stderr)
+        if not get_env_bool("USE_INDEXING_BACKEND", True):
+            print("Skipping indexing (USE_INDEXING_BACKEND=False)", file=sys.stderr)
             sys.exit(0)  # Permanent skip - indexing disabled
         else:
             contents = find_indexable_content()
-            indexed_sources = [source for source, _ in contents]
 
             if not contents:
-                status = 'skipped'
-                print('No indexable content found', file=sys.stderr)
+                status = "skipped"
+                print("No indexable content found", file=sys.stderr)
             else:
                 texts = [content for _, content in contents]
                 index_in_sonic(snapshot_id, texts)
-                status = 'succeeded'
-                output = OUTPUT_DIR
+                status = "succeeded"
 
     except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
+        error = f"{type(e).__name__}: {e}"
+        status = "failed"
 
     if error:
-        print(f'ERROR: {error}', file=sys.stderr)
+        print(f"ERROR: {error}", file=sys.stderr)
 
     # Search indexing hooks don't emit ArchiveResult - they're utility hooks
     # Exit code indicates success/failure
-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status == "succeeded" else 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/search_backend_sonic/search.py b/abx_plugins/plugins/search_backend_sonic/search.py
index 0a4410f..ffa35b6 100755
--- a/abx_plugins/plugins/search_backend_sonic/search.py
+++ b/abx_plugins/plugins/search_backend_sonic/search.py
@@ -11,46 +11,55 @@
 # This module provides the search interface for the Sonic backend.
 
 import os
-from typing import List, Iterable
+from importlib import import_module
+from typing import Any, Iterable, List
 
 
 def get_sonic_config() -> dict:
     """Get Sonic connection configuration."""
     return {
-        'host': os.environ.get('SEARCH_BACKEND_HOST_NAME', '127.0.0.1').strip(),
-        'port': int(os.environ.get('SEARCH_BACKEND_PORT', '1491')),
-        'password': os.environ.get('SEARCH_BACKEND_PASSWORD', 'SecretPassword').strip(),
-        'collection': os.environ.get('SONIC_COLLECTION', 'archivebox').strip(),
-        'bucket': os.environ.get('SONIC_BUCKET', 'snapshots').strip(),
+        "host": os.environ.get("SEARCH_BACKEND_HOST_NAME", "127.0.0.1").strip(),
+        "port": int(os.environ.get("SEARCH_BACKEND_PORT", "1491")),
+        "password": os.environ.get("SEARCH_BACKEND_PASSWORD", "SecretPassword").strip(),
+        "collection": os.environ.get("SONIC_COLLECTION", "archivebox").strip(),
+        "bucket": os.environ.get("SONIC_BUCKET", "snapshots").strip(),
     }
 
 
 def search(query: str) -> List[str]:
     """Search for snapshots in Sonic."""
     try:
-        from sonic import SearchClient
-    except ImportError:
-        raise RuntimeError('sonic-client not installed. Run: pip install sonic-client')
+        sonic = import_module("sonic")
+    except ModuleNotFoundError:
+        raise RuntimeError("sonic-client not installed. Run: pip install sonic-client")
+    search_client_cls: Any = sonic.SearchClient
 
     config = get_sonic_config()
 
-    with SearchClient(config['host'], config['port'], config['password']) as search_client:
-        results = search_client.query(config['collection'], config['bucket'], query, limit=100)
+    with search_client_cls(
+        config["host"], config["port"], config["password"]
+    ) as search_client:
+        results = search_client.query(
+            config["collection"], config["bucket"], query, limit=100
+        )
         return results
 
 
 def flush(snapshot_ids: Iterable[str]) -> None:
     """Remove snapshots from Sonic index."""
     try:
-        from sonic import IngestClient
-    except ImportError:
-        raise RuntimeError('sonic-client not installed. Run: pip install sonic-client')
+        sonic = import_module("sonic")
+    except ModuleNotFoundError:
+        raise RuntimeError("sonic-client not installed. Run: pip install sonic-client")
+    ingest_client_cls: Any = sonic.IngestClient
 
     config = get_sonic_config()
 
-    with IngestClient(config['host'], config['port'], config['password']) as ingest:
+    with ingest_client_cls(
+        config["host"], config["port"], config["password"]
+    ) as ingest:
         for snapshot_id in snapshot_ids:
             try:
-                ingest.flush_object(config['collection'], config['bucket'], snapshot_id)
+                ingest.flush_object(config["collection"], config["bucket"], snapshot_id)
             except Exception:
                 pass
diff --git a/abx_plugins/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py b/abx_plugins/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
index 31ba1bf..c45c497 100755
--- a/abx_plugins/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
+++ b/abx_plugins/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
@@ -22,7 +22,6 @@
     SNAP_DIR: Snapshot directory (default: cwd)
 """
 
-import json
 import os
 import re
 import sqlite3
@@ -33,49 +32,51 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'index_sqlite'
+PLUGIN_NAME = "index_sqlite"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 # Text file patterns to index, in priority order
 INDEXABLE_FILES = [
-    ('readability', 'content.txt'),
-    ('readability', 'content.html'),
-    ('mercury', 'content.txt'),
-    ('mercury', 'content.html'),
-    ('htmltotext', 'output.txt'),
-    ('singlefile', 'singlefile.html'),
-    ('dom', 'output.html'),
-    ('wget', '**/*.html'),
-    ('wget', '**/*.htm'),
-    ('title', 'title.txt'),
+    ("readability", "content.txt"),
+    ("readability", "content.html"),
+    ("mercury", "content.txt"),
+    ("mercury", "content.html"),
+    ("htmltotext", "output.txt"),
+    ("singlefile", "singlefile.html"),
+    ("dom", "output.html"),
+    ("wget", "**/*.html"),
+    ("wget", "**/*.htm"),
+    ("title", "title.txt"),
 ]
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
 def strip_html_tags(html: str) -> str:
     """Remove HTML tags, keeping text content."""
-    html = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<[^>]+>', ' ', html)
-    html = html.replace('&nbsp;', ' ').replace('&amp;', '&')
-    html = html.replace('&lt;', '<').replace('&gt;', '>')
-    html = html.replace('&quot;', '"')
-    html = re.sub(r'\s+', ' ', html)
+    html = re.sub(
+        r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE
+    )
+    html = re.sub(r"<style[^>]*>.*?</style>", "", html, flags=re.DOTALL | re.IGNORECASE)
+    html = re.sub(r"<[^>]+>", " ", html)
+    html = html.replace("&nbsp;", " ").replace("&amp;", "&")
+    html = html.replace("&lt;", "<").replace("&gt;", ">")
+    html = html.replace("&quot;", '"')
+    html = re.sub(r"\s+", " ", html)
     return html.strip()
 
 
@@ -89,7 +90,7 @@ def find_indexable_content() -> list[tuple[str, str]]:
         if not plugin_dir.exists():
             continue
 
-        if '*' in file_pattern:
+        if "*" in file_pattern:
             matches = list(plugin_dir.glob(file_pattern))
         else:
             match = plugin_dir / file_pattern
@@ -98,11 +99,11 @@ def find_indexable_content() -> list[tuple[str, str]]:
         for match in matches:
             if match.is_file() and match.stat().st_size > 0:
                 try:
-                    content = match.read_text(encoding='utf-8', errors='ignore')
+                    content = match.read_text(encoding="utf-8", errors="ignore")
                     if content.strip():
-                        if match.suffix in ('.html', '.htm'):
+                        if match.suffix in (".html", ".htm"):
                             content = strip_html_tags(content)
-                        results.append((f'{extractor}/{match.name}', content))
+                        results.append((f"{extractor}/{match.name}", content))
                 except Exception:
                     continue
 
@@ -111,32 +112,32 @@ def find_indexable_content() -> list[tuple[str, str]]:
 
 def get_db_path() -> Path:
     """Get path to the search index database."""
-    snap_dir = get_env('SNAP_DIR', str(Path.cwd().parent))
-    db_name = get_env('SQLITEFTS_DB', 'search.sqlite3')
+    snap_dir = get_env("SNAP_DIR", str(Path.cwd().parent))
+    db_name = get_env("SQLITEFTS_DB", "search.sqlite3")
     return Path(snap_dir) / db_name
 
 
 def index_in_sqlite(snapshot_id: str, texts: list[str]) -> None:
     """Index texts in SQLite FTS5."""
     db_path = get_db_path()
-    tokenizers = get_env('FTS_TOKENIZERS', 'porter unicode61 remove_diacritics 2')
+    tokenizers = get_env("FTS_TOKENIZERS", "porter unicode61 remove_diacritics 2")
     conn = sqlite3.connect(str(db_path))
 
     try:
         # Create FTS5 table if needed
-        conn.execute(f'''
+        conn.execute(f"""
             CREATE VIRTUAL TABLE IF NOT EXISTS search_index
             USING fts5(snapshot_id, content, tokenize='{tokenizers}')
-        ''')
+        """)
 
         # Remove existing entries
-        conn.execute('DELETE FROM search_index WHERE snapshot_id = ?', (snapshot_id,))
+        conn.execute("DELETE FROM search_index WHERE snapshot_id = ?", (snapshot_id,))
 
         # Insert new content
-        content = '\n\n'.join(texts)
+        content = "\n\n".join(texts)
         conn.execute(
-            'INSERT INTO search_index (snapshot_id, content) VALUES (?, ?)',
-            (snapshot_id, content)
+            "INSERT INTO search_index (snapshot_id, content) VALUES (?, ?)",
+            (snapshot_id, content),
         )
         conn.commit()
     finally:
@@ -144,49 +145,48 @@ def index_in_sqlite(snapshot_id: str, texts: list[str]) -> None:
 
 
 @click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL that was archived")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Index snapshot content in SQLite FTS5."""
 
-    output = None
-    status = 'failed'
-    error = ''
-    indexed_sources = []
+    status = "failed"
+    error = ""
 
     try:
         # Check if this backend is enabled (permanent skips - don't retry)
-        backend = get_env('SEARCH_BACKEND_ENGINE', 'sqlite')
-        if backend != 'sqlite':
-            print(f'Skipping SQLite indexing (SEARCH_BACKEND_ENGINE={backend})', file=sys.stderr)
+        backend = get_env("SEARCH_BACKEND_ENGINE", "sqlite")
+        if backend != "sqlite":
+            print(
+                f"Skipping SQLite indexing (SEARCH_BACKEND_ENGINE={backend})",
+                file=sys.stderr,
+            )
             sys.exit(0)  # Permanent skip - different backend selected
-        if not get_env_bool('USE_INDEXING_BACKEND', True):
-            print('Skipping indexing (USE_INDEXING_BACKEND=False)', file=sys.stderr)
+        if not get_env_bool("USE_INDEXING_BACKEND", True):
+            print("Skipping indexing (USE_INDEXING_BACKEND=False)", file=sys.stderr)
             sys.exit(0)  # Permanent skip - indexing disabled
         else:
             contents = find_indexable_content()
-            indexed_sources = [source for source, _ in contents]
 
             if not contents:
-                status = 'skipped'
-                print('No indexable content found', file=sys.stderr)
+                status = "skipped"
+                print("No indexable content found", file=sys.stderr)
             else:
                 texts = [content for _, content in contents]
                 index_in_sqlite(snapshot_id, texts)
-                status = 'succeeded'
-                output = OUTPUT_DIR
+                status = "succeeded"
 
     except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
+        error = f"{type(e).__name__}: {e}"
+        status = "failed"
 
     if error:
-        print(f'ERROR: {error}', file=sys.stderr)
+        print(f"ERROR: {error}", file=sys.stderr)
 
     # Search indexing hooks don't emit ArchiveResult - they're utility hooks
     # Exit code indicates success/failure
-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status == "succeeded" else 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/search_backend_sqlite/search.py b/abx_plugins/plugins/search_backend_sqlite/search.py
index 7e733fc..0d187cf 100755
--- a/abx_plugins/plugins/search_backend_sqlite/search.py
+++ b/abx_plugins/plugins/search_backend_sqlite/search.py
@@ -21,13 +21,19 @@
 
 
 # Config with old var names for backwards compatibility
-SQLITEFTS_DB = os.environ.get('SQLITEFTS_DB', 'search.sqlite3').strip()
-FTS_SEPARATE_DATABASE = os.environ.get('FTS_SEPARATE_DATABASE', 'true').lower() in ('true', '1', 'yes')
-FTS_TOKENIZERS = os.environ.get('FTS_TOKENIZERS', 'porter unicode61 remove_diacritics 2').strip()
+SQLITEFTS_DB = os.environ.get("SQLITEFTS_DB", "search.sqlite3").strip()
+FTS_SEPARATE_DATABASE = os.environ.get("FTS_SEPARATE_DATABASE", "true").lower() in (
+    "true",
+    "1",
+    "yes",
+)
+FTS_TOKENIZERS = os.environ.get(
+    "FTS_TOKENIZERS", "porter unicode61 remove_diacritics 2"
+).strip()
 
 
 def _get_data_dir() -> Path:
-    data_dir = os.environ.get('SNAP_DIR', '').strip()
+    data_dir = os.environ.get("SNAP_DIR", "").strip()
     if data_dir:
         return Path(data_dir)
     return Path.cwd()
@@ -47,8 +53,8 @@ def search(query: str) -> List[str]:
     conn = sqlite3.connect(str(db_path))
     try:
         cursor = conn.execute(
-            'SELECT DISTINCT snapshot_id FROM search_index WHERE search_index MATCH ?',
-            (query,)
+            "SELECT DISTINCT snapshot_id FROM search_index WHERE search_index MATCH ?",
+            (query,),
         )
         return [row[0] for row in cursor.fetchall()]
     except sqlite3.OperationalError:
@@ -67,7 +73,9 @@ def flush(snapshot_ids: Iterable[str]) -> None:
     conn = sqlite3.connect(str(db_path))
     try:
         for snapshot_id in snapshot_ids:
-            conn.execute('DELETE FROM search_index WHERE snapshot_id = ?', (snapshot_id,))
+            conn.execute(
+                "DELETE FROM search_index WHERE snapshot_id = ?", (snapshot_id,)
+            )
         conn.commit()
     except sqlite3.OperationalError:
         pass  # Table doesn't exist
diff --git a/abx_plugins/plugins/search_backend_sqlite/tests/test_sqlite_search.py b/abx_plugins/plugins/search_backend_sqlite/tests/test_sqlite_search.py
index cc617b3..266136d 100644
--- a/abx_plugins/plugins/search_backend_sqlite/tests/test_sqlite_search.py
+++ b/abx_plugins/plugins/search_backend_sqlite/tests/test_sqlite_search.py
@@ -33,8 +33,8 @@ def setup_method(self, _method=None):
         self.temp_dir = tempfile.mkdtemp()
         self.db_path = Path(self.temp_dir) / SQLITEFTS_DB
 
-        self._orig_data_dir = os.environ.get('SNAP_DIR')
-        os.environ['SNAP_DIR'] = self.temp_dir
+        self._orig_data_dir = os.environ.get("SNAP_DIR")
+        os.environ["SNAP_DIR"] = self.temp_dir
 
         # Create FTS5 table
         self._create_index()
@@ -42,17 +42,18 @@ def setup_method(self, _method=None):
     def teardown_method(self, _method=None):
         """Clean up temporary directory."""
         if self._orig_data_dir is None:
-            os.environ.pop('SNAP_DIR', None)
+            os.environ.pop("SNAP_DIR", None)
         else:
-            os.environ['SNAP_DIR'] = self._orig_data_dir
+            os.environ["SNAP_DIR"] = self._orig_data_dir
         import shutil
+
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def _create_index(self):
         """Create the FTS5 search index table."""
         conn = sqlite3.connect(str(self.db_path))
         try:
-            conn.execute(f'''
+            conn.execute(f"""
                 CREATE VIRTUAL TABLE IF NOT EXISTS search_index
                 USING fts5(
                     snapshot_id,
@@ -61,7 +62,7 @@ def _create_index(self):
                     content,
                     tokenize = '{FTS_TOKENIZERS}'
                 )
-            ''')
+            """)
             conn.commit()
         finally:
             conn.close()
@@ -71,8 +72,8 @@ def _index_snapshot(self, snapshot_id: str, url: str, title: str, content: str):
         conn = sqlite3.connect(str(self.db_path))
         try:
             conn.execute(
-                'INSERT INTO search_index (snapshot_id, url, title, content) VALUES (?, ?, ?, ?)',
-                (snapshot_id, url, title, content)
+                "INSERT INTO search_index (snapshot_id, url, title, content) VALUES (?, ?, ?, ?)",
+                (snapshot_id, url, title, content),
             )
             conn.commit()
         finally:
@@ -85,161 +86,200 @@ def test_get_db_path(self):
 
     def test_search_empty_index(self):
         """search should return empty list for empty index."""
-        results = search('nonexistent')
+        results = search("nonexistent")
         assert results == []
 
     def test_search_no_index_file(self):
         """search should return empty list when index file doesn't exist."""
         os.remove(self.db_path)
-        results = search('test')
+        results = search("test")
         assert results == []
 
     def test_search_single_result(self):
         """search should find matching snapshot."""
         self._index_snapshot(
-            'snap-001',
-            'https://example.com/page1',
-            'Example Page',
-            'This is example content about testing.'
+            "snap-001",
+            "https://example.com/page1",
+            "Example Page",
+            "This is example content about testing.",
         )
 
-        results = search('example')
+        results = search("example")
         assert len(results) == 1
-        assert results[0] == 'snap-001'
+        assert results[0] == "snap-001"
 
     def test_search_multiple_results(self):
         """search should find all matching snapshots."""
-        self._index_snapshot('snap-001', 'https://example.com/1', 'Python Tutorial', 'Learn Python programming')
-        self._index_snapshot('snap-002', 'https://example.com/2', 'Python Guide', 'Advanced Python concepts')
-        self._index_snapshot('snap-003', 'https://example.com/3', 'JavaScript Basics', 'Learn JavaScript')
+        self._index_snapshot(
+            "snap-001",
+            "https://example.com/1",
+            "Python Tutorial",
+            "Learn Python programming",
+        )
+        self._index_snapshot(
+            "snap-002",
+            "https://example.com/2",
+            "Python Guide",
+            "Advanced Python concepts",
+        )
+        self._index_snapshot(
+            "snap-003", "https://example.com/3", "JavaScript Basics", "Learn JavaScript"
+        )
 
-        results = search('Python')
+        results = search("Python")
         assert len(results) == 2
-        assert 'snap-001' in results
-        assert 'snap-002' in results
-        assert 'snap-003' not in results
+        assert "snap-001" in results
+        assert "snap-002" in results
+        assert "snap-003" not in results
 
     def test_search_title_match(self):
         """search should match against title."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Django Web Framework', 'Content here')
+        self._index_snapshot(
+            "snap-001", "https://example.com", "Django Web Framework", "Content here"
+        )
 
-        results = search('Django')
+        results = search("Django")
         assert len(results) == 1
-        assert results[0] == 'snap-001'
+        assert results[0] == "snap-001"
 
     def test_search_url_match(self):
         """search should match against URL."""
-        self._index_snapshot('snap-001', 'https://archivebox.io/docs', 'Title', 'Content')
+        self._index_snapshot(
+            "snap-001", "https://archivebox.io/docs", "Title", "Content"
+        )
 
-        results = search('archivebox')
+        results = search("archivebox")
         assert len(results) == 1
 
     def test_search_content_match(self):
         """search should match against content."""
         self._index_snapshot(
-            'snap-001',
-            'https://example.com',
-            'Generic Title',
-            'This document contains information about cryptography and security.'
+            "snap-001",
+            "https://example.com",
+            "Generic Title",
+            "This document contains information about cryptography and security.",
         )
 
-        results = search('cryptography')
+        results = search("cryptography")
         assert len(results) == 1
 
     def test_search_case_insensitive(self):
         """search should be case insensitive."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'PYTHON programming')
+        self._index_snapshot(
+            "snap-001", "https://example.com", "Title", "PYTHON programming"
+        )
 
-        results = search('python')
+        results = search("python")
         assert len(results) == 1
 
     def test_search_stemming(self):
         """search should use porter stemmer for word stems."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'Programming concepts')
+        self._index_snapshot(
+            "snap-001", "https://example.com", "Title", "Programming concepts"
+        )
 
         # 'program' should match 'programming' with porter stemmer
-        results = search('program')
+        results = search("program")
         assert len(results) == 1
 
     def test_search_multiple_words(self):
         """search should match documents with all words."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Web Development', 'Learn web development skills')
-        self._index_snapshot('snap-002', 'https://example.com', 'Web Design', 'Design beautiful websites')
+        self._index_snapshot(
+            "snap-001",
+            "https://example.com",
+            "Web Development",
+            "Learn web development skills",
+        )
+        self._index_snapshot(
+            "snap-002", "https://example.com", "Web Design", "Design beautiful websites"
+        )
 
-        results = search('web development')
+        results = search("web development")
         # FTS5 defaults to OR, so both might match
         # With porter stemmer, both should match 'web'
-        assert 'snap-001' in results
+        assert "snap-001" in results
 
     def test_search_phrase(self):
         """search should support phrase queries."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'machine learning algorithms')
-        self._index_snapshot('snap-002', 'https://example.com', 'Title', 'machine algorithms learning')
+        self._index_snapshot(
+            "snap-001", "https://example.com", "Title", "machine learning algorithms"
+        )
+        self._index_snapshot(
+            "snap-002", "https://example.com", "Title", "machine algorithms learning"
+        )
 
         # Phrase search with quotes
         results = search('"machine learning"')
         assert len(results) == 1
-        assert results[0] == 'snap-001'
+        assert results[0] == "snap-001"
 
     def test_search_distinct_results(self):
         """search should return distinct snapshot IDs."""
         # Index same snapshot twice (could happen with multiple fields matching)
-        self._index_snapshot('snap-001', 'https://python.org', 'Python', 'Python programming language')
+        self._index_snapshot(
+            "snap-001", "https://python.org", "Python", "Python programming language"
+        )
 
-        results = search('Python')
+        results = search("Python")
         assert len(results) == 1
 
     def test_flush_single(self):
         """flush should remove snapshot from index."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'Content')
-        self._index_snapshot('snap-002', 'https://example.com', 'Title', 'Content')
+        self._index_snapshot("snap-001", "https://example.com", "Title", "Content")
+        self._index_snapshot("snap-002", "https://example.com", "Title", "Content")
 
-        flush(['snap-001'])
+        flush(["snap-001"])
 
-        results = search('Content')
+        results = search("Content")
         assert len(results) == 1
-        assert results[0] == 'snap-002'
+        assert results[0] == "snap-002"
 
     def test_flush_multiple(self):
         """flush should remove multiple snapshots."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'Test')
-        self._index_snapshot('snap-002', 'https://example.com', 'Title', 'Test')
-        self._index_snapshot('snap-003', 'https://example.com', 'Title', 'Test')
+        self._index_snapshot("snap-001", "https://example.com", "Title", "Test")
+        self._index_snapshot("snap-002", "https://example.com", "Title", "Test")
+        self._index_snapshot("snap-003", "https://example.com", "Title", "Test")
 
-        flush(['snap-001', 'snap-003'])
+        flush(["snap-001", "snap-003"])
 
-        results = search('Test')
+        results = search("Test")
         assert len(results) == 1
-        assert results[0] == 'snap-002'
+        assert results[0] == "snap-002"
 
     def test_flush_nonexistent(self):
         """flush should not raise for nonexistent snapshots."""
         # Should not raise
-        flush(['nonexistent-snap'])
+        flush(["nonexistent-snap"])
 
     def test_flush_no_index(self):
         """flush should not raise when index doesn't exist."""
         os.remove(self.db_path)
         # Should not raise
-        flush(['snap-001'])
+        flush(["snap-001"])
 
     def test_search_special_characters(self):
         """search should handle special characters in queries."""
-        self._index_snapshot('snap-001', 'https://example.com', 'C++ Programming', 'Learn C++ basics')
+        self._index_snapshot(
+            "snap-001", "https://example.com", "C++ Programming", "Learn C++ basics"
+        )
 
         # FTS5 handles special chars
-        results = search('C++')
+        results = search("C++")
         # May or may not match depending on tokenizer config
         # At minimum, should not raise
         assert isinstance(results, list)
 
     def test_search_unicode(self):
         """search should handle unicode content."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Titre Francais', 'cafe resume')
-        self._index_snapshot('snap-002', 'https://example.com', 'Japanese', 'Hello world')
+        self._index_snapshot(
+            "snap-001", "https://example.com", "Titre Francais", "cafe resume"
+        )
+        self._index_snapshot(
+            "snap-002", "https://example.com", "Japanese", "Hello world"
+        )
 
         # With remove_diacritics, 'cafe' should match
-        results = search('cafe')
+        results = search("cafe")
         assert len(results) == 1
 
 
@@ -251,13 +291,13 @@ def setup_method(self, _method=None):
         self.temp_dir = tempfile.mkdtemp()
         self.db_path = Path(self.temp_dir) / SQLITEFTS_DB
 
-        self._orig_data_dir = os.environ.get('SNAP_DIR')
-        os.environ['SNAP_DIR'] = self.temp_dir
+        self._orig_data_dir = os.environ.get("SNAP_DIR")
+        os.environ["SNAP_DIR"] = self.temp_dir
 
         # Create index
         conn = sqlite3.connect(str(self.db_path))
         try:
-            conn.execute(f'''
+            conn.execute(f"""
                 CREATE VIRTUAL TABLE IF NOT EXISTS search_index
                 USING fts5(
                     snapshot_id,
@@ -266,28 +306,43 @@ def setup_method(self, _method=None):
                     content,
                     tokenize = '{FTS_TOKENIZERS}'
                 )
-            ''')
+            """)
             # Index realistic data
             test_data = [
-                ('snap-001', 'https://github.com/ArchiveBox/ArchiveBox',
-                 'ArchiveBox - Self-hosted web archiving',
-                 'Open source self-hosted web archiving. Collects, saves, and displays various types of content.'),
-                ('snap-002', 'https://docs.python.org/3/tutorial/',
-                 'Python 3 Tutorial',
-                 'An informal introduction to Python. Python is an easy to learn, powerful programming language.'),
-                ('snap-003', 'https://developer.mozilla.org/docs/Web/JavaScript',
-                 'JavaScript - MDN Web Docs',
-                 'JavaScript (JS) is a lightweight, interpreted programming language with first-class functions.'),
-                ('snap-004', 'https://news.ycombinator.com',
-                 'Hacker News',
-                 'Social news website focusing on computer science and entrepreneurship.'),
-                ('snap-005', 'https://en.wikipedia.org/wiki/Web_archiving',
-                 'Web archiving - Wikipedia',
-                 'Web archiving is the process of collecting portions of the World Wide Web to ensure the information is preserved.'),
+                (
+                    "snap-001",
+                    "https://github.com/ArchiveBox/ArchiveBox",
+                    "ArchiveBox - Self-hosted web archiving",
+                    "Open source self-hosted web archiving. Collects, saves, and displays various types of content.",
+                ),
+                (
+                    "snap-002",
+                    "https://docs.python.org/3/tutorial/",
+                    "Python 3 Tutorial",
+                    "An informal introduction to Python. Python is an easy to learn, powerful programming language.",
+                ),
+                (
+                    "snap-003",
+                    "https://developer.mozilla.org/docs/Web/JavaScript",
+                    "JavaScript - MDN Web Docs",
+                    "JavaScript (JS) is a lightweight, interpreted programming language with first-class functions.",
+                ),
+                (
+                    "snap-004",
+                    "https://news.ycombinator.com",
+                    "Hacker News",
+                    "Social news website focusing on computer science and entrepreneurship.",
+                ),
+                (
+                    "snap-005",
+                    "https://en.wikipedia.org/wiki/Web_archiving",
+                    "Web archiving - Wikipedia",
+                    "Web archiving is the process of collecting portions of the World Wide Web to ensure the information is preserved.",
+                ),
             ]
             conn.executemany(
-                'INSERT INTO search_index (snapshot_id, url, title, content) VALUES (?, ?, ?, ?)',
-                test_data
+                "INSERT INTO search_index (snapshot_id, url, title, content) VALUES (?, ?, ?, ?)",
+                test_data,
             )
             conn.commit()
         finally:
@@ -296,53 +351,54 @@ def setup_method(self, _method=None):
     def teardown_method(self, _method=None):
         """Clean up."""
         if self._orig_data_dir is None:
-            os.environ.pop('SNAP_DIR', None)
+            os.environ.pop("SNAP_DIR", None)
         else:
-            os.environ['SNAP_DIR'] = self._orig_data_dir
+            os.environ["SNAP_DIR"] = self._orig_data_dir
         import shutil
+
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
     def test_search_archivebox(self):
         """Search for 'archivebox' should find relevant results."""
-        results = search('archivebox')
-        assert 'snap-001' in results
+        results = search("archivebox")
+        assert "snap-001" in results
 
     def test_search_programming(self):
         """Search for 'programming' should find Python and JS docs."""
-        results = search('programming')
-        assert 'snap-002' in results
-        assert 'snap-003' in results
+        results = search("programming")
+        assert "snap-002" in results
+        assert "snap-003" in results
 
     def test_search_web_archiving(self):
         """Search for 'web archiving' should find relevant results."""
-        results = search('web archiving')
+        results = search("web archiving")
         # Both ArchiveBox and Wikipedia should match
-        assert 'snap-001' in results
-        assert 'snap-005' in results
+        assert "snap-001" in results
+        assert "snap-005" in results
 
     def test_search_github(self):
         """Search for 'github' should find URL match."""
-        results = search('github')
-        assert 'snap-001' in results
+        results = search("github")
+        assert "snap-001" in results
 
     def test_search_tutorial(self):
         """Search for 'tutorial' should find Python tutorial."""
-        results = search('tutorial')
-        assert 'snap-002' in results
+        results = search("tutorial")
+        assert "snap-002" in results
 
     def test_flush_and_search(self):
         """Flushing a snapshot should remove it from search results."""
         # Verify it's there first
-        results = search('archivebox')
-        assert 'snap-001' in results
+        results = search("archivebox")
+        assert "snap-001" in results
 
         # Flush it
-        flush(['snap-001'])
+        flush(["snap-001"])
 
         # Should no longer be found
-        results = search('archivebox')
-        assert 'snap-001' not in results
+        results = search("archivebox")
+        assert "snap-001" not in results
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/seo/tests/test_seo.py b/abx_plugins/plugins/seo/tests/test_seo.py
index 398bff5..fa31a55 100644
--- a/abx_plugins/plugins/seo/tests/test_seo.py
+++ b/abx_plugins/plugins/seo/tests/test_seo.py
@@ -1,8 +1,7 @@
 """
 Tests for the SEO plugin.
 
-Tests the real SEO hook with an actual URL to verify
-meta tag extraction.
+Tests deterministic SEO extraction via local pytest-httpserver fixtures.
 """
 
 import json
@@ -13,18 +12,47 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
     CHROME_NAVIGATE_HOOK,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_url,
 )
 
 
 # Get the path to the SEO hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-SEO_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_seo.*')
+SEO_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_seo.*")
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+
+
+@pytest.fixture
+def seo_test_url(httpserver):
+    """Serve a deterministic page with known SEO tags."""
+    httpserver.expect_request("/seo").respond_with_data(
+        """
+        <!doctype html>
+        <html lang="en">
+          <head>
+            <meta charset="utf-8" />
+            <title>Deterministic SEO Title</title>
+            <meta name="description" content="SEO fixture description" />
+            <meta name="keywords" content="archivebox,seo,fixture" />
+            <meta property="og:title" content="Deterministic OG Title" />
+            <meta property="og:description" content="Deterministic OG Description" />
+            <meta name="twitter:title" content="Deterministic Twitter Title" />
+            <link rel="canonical" href="/canonical-target" />
+          </head>
+          <body>
+            <h1>SEO Fixture</h1>
+          </body>
+        </html>
+        """.strip(),
+        content_type="text/html; charset=utf-8",
+    )
+    return httpserver.url_for("/seo")
 
 
 class TestSEOPlugin:
@@ -47,81 +75,72 @@ def teardown_method(self, _method=None):
         """Clean up."""
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
-    def test_seo_extracts_meta_tags(self, chrome_test_url):
-        """SEO hook should extract meta tags from a real URL."""
-        test_url = chrome_test_url
-        snapshot_id = 'test-seo-snapshot'
+    def test_seo_extracts_meta_tags(self, seo_test_url):
+        """SEO hook should extract known meta tags from deterministic fixture."""
+        test_url = seo_test_url
+        snapshot_id = "test-seo-snapshot"
 
         with chrome_session(
             self.temp_dir,
-            crawl_id='test-seo-crawl',
+            crawl_id="test-seo-crawl",
             snapshot_id=snapshot_id,
             test_url=test_url,
             navigate=False,
-            timeout=30,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
         ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            seo_dir = snapshot_chrome_dir.parent / 'seo'
+            seo_dir = snapshot_chrome_dir.parent / "seo"
             seo_dir.mkdir(exist_ok=True)
 
             nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(CHROME_NAVIGATE_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(snapshot_chrome_dir),
                 capture_output=True,
                 text=True,
                 timeout=120,
-                env=env
+                env=env,
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
             # Run SEO hook with the active Chrome session
             result = subprocess.run(
-                ['node', str(SEO_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                [
+                    "node",
+                    str(SEO_HOOK),
+                    f"--url={test_url}",
+                    f"--snapshot-id={snapshot_id}",
+                ],
                 cwd=str(seo_dir),
                 capture_output=True,
                 text=True,
                 timeout=60,
-                env=env
+                env=env,
             )
 
             # Check for output file
-            seo_output = seo_dir / 'seo.json'
-
-            seo_data = None
-
-            # Try parsing from file first
-            if seo_output.exists():
-                with open(seo_output) as f:
-                    try:
-                        seo_data = json.load(f)
-                    except json.JSONDecodeError:
-                        pass
-
-            # Try parsing from stdout if not in file
-            if not seo_data:
-                for line in result.stdout.split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            # SEO data typically has title, description, or og: tags
-                            if any(key in record for key in ['title', 'description', 'og:title', 'canonical']):
-                                seo_data = record
-                                break
-                        except json.JSONDecodeError:
-                            continue
+            seo_output = seo_dir / "seo.json"
 
             # Verify hook ran successfully
             assert result.returncode == 0, f"Hook failed: {result.stderr}"
-            assert 'Traceback' not in result.stderr
-            assert 'Error:' not in result.stderr
-
-            # example.com has a title, so we MUST get SEO data
-            assert seo_data is not None, "No SEO data extracted from file or stdout"
-
-            # Verify we got some SEO data
-            has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta'])
-            assert has_seo_data, f"No SEO data extracted: {seo_data}"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+            assert "Traceback" not in result.stderr
+            assert "Error:" not in result.stderr
+
+            assert seo_output.exists(), "No seo.json produced"
+            seo_data = json.loads(seo_output.read_text())
+            assert seo_data["title"] == "Deterministic SEO Title"
+            assert seo_data["description"] == "SEO fixture description"
+            assert seo_data["keywords"] == "archivebox,seo,fixture"
+            assert seo_data["og:title"] == "Deterministic OG Title"
+            assert seo_data["og:description"] == "Deterministic OG Description"
+            assert seo_data["twitter:title"] == "Deterministic Twitter Title"
+            assert seo_data["canonical"] == "/canonical-target"
+            assert seo_data["language"] == "en"
+            assert seo_data["url"] == test_url
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/singlefile/on_Crawl__45_singlefile_install.py b/abx_plugins/plugins/singlefile/on_Crawl__45_singlefile_install.py
index 0400d62..f85afbe 100755
--- a/abx_plugins/plugins/singlefile/on_Crawl__45_singlefile_install.py
+++ b/abx_plugins/plugins/singlefile/on_Crawl__45_singlefile_install.py
@@ -12,55 +12,59 @@
 import os
 import sys
 from pathlib import Path
+from typing import Any
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
+def output_binary(
+    name: str, binproviders: str, overrides: dict[str, Any] | None = None
+) -> None:
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+    record: dict[str, Any] = {
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     if overrides:
-        record['overrides'] = overrides
+        record["overrides"] = overrides
     print(json.dumps(record))
 
 
 def main():
-    singlefile_enabled = get_env_bool('SINGLEFILE_ENABLED', True)
+    singlefile_enabled = get_env_bool("SINGLEFILE_ENABLED", True)
 
     if not singlefile_enabled:
         sys.exit(0)
 
     output_binary(
-        name='single-file',
-        binproviders='npm,env',
-        overrides={'npm': {'packages': ['single-file-cli']}},
+        name="single-file",
+        binproviders="npm,env",
+        overrides={"npm": {"packages": ["single-file-cli"]}},
     )
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/singlefile/on_Crawl__82_singlefile_install.js b/abx_plugins/plugins/singlefile/on_Crawl__82_singlefile_install.js
index 4d4f637..a325883 100755
--- a/abx_plugins/plugins/singlefile/on_Crawl__82_singlefile_install.js
+++ b/abx_plugins/plugins/singlefile/on_Crawl__82_singlefile_install.js
@@ -118,7 +118,7 @@ async function saveSinglefileWithExtension(page, extension, options = {}) {
     );
 
     // Output directory is current directory (hook already runs in output dir)
-    const out_path = path.join(OUTPUT_DIR, OUTPUT_FILE);
+    const out_path = options.outputPath || path.join(OUTPUT_DIR, OUTPUT_FILE);
 
     console.error(`[singlefile] Saving via extension (${extension.id})...`);
 
diff --git a/abx_plugins/plugins/singlefile/on_Snapshot__50_singlefile.py b/abx_plugins/plugins/singlefile/on_Snapshot__50_singlefile.py
index 72726b5..8579488 100755
--- a/abx_plugins/plugins/singlefile/on_Snapshot__50_singlefile.py
+++ b/abx_plugins/plugins/singlefile/on_Snapshot__50_singlefile.py
@@ -39,27 +39,27 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'singlefile'
-BIN_NAME = 'single-file'
-BIN_PROVIDERS = 'npm,env'
+PLUGIN_NAME = "singlefile"
+BIN_NAME = "single-file"
+BIN_PROVIDERS = "npm,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-OUTPUT_FILE = 'singlefile.html'
-EXTENSION_SAVE_SCRIPT = Path(__file__).parent / 'singlefile_extension_save.js'
+OUTPUT_FILE = "singlefile.html"
+EXTENSION_SAVE_SCRIPT = Path(__file__).parent / "singlefile_extension_save.js"
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -73,7 +73,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -85,25 +85,29 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
         return default if default is not None else []
 
 
-STATICFILE_DIR = '../staticfile'
+STATICFILE_DIR = "../staticfile"
+
 
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
     if not staticfile_dir.exists():
         return False
-    stdout_log = staticfile_dir / 'stdout.log'
+    stdout_log = staticfile_dir / "stdout.log"
     if not stdout_log.exists():
         return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
+    for line in stdout_log.read_text(errors="ignore").splitlines():
         line = line.strip()
-        if not line.startswith('{'):
+        if not line.startswith("{"):
             continue
         try:
             record = json.loads(line)
         except json.JSONDecodeError:
             continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+        if (
+            record.get("type") == "ArchiveResult"
+            and record.get("status") == "succeeded"
+        ):
             return True
     return False
 
@@ -111,12 +115,12 @@ def has_staticfile_output() -> bool:
 # Chrome session directory (relative to extractor output dir)
 # Note: Chrome binary is obtained via CHROME_BINARY env var, not searched for.
 # The centralized Chrome binary search is in chrome_utils.js findChromium().
-CHROME_SESSION_DIR = '../chrome'
+CHROME_SESSION_DIR = "../chrome"
 
 
 def get_cdp_url(wait_seconds: float = 0.0) -> str | None:
     """Get CDP URL from chrome plugin if available."""
-    cdp_file = Path(CHROME_SESSION_DIR) / 'cdp_url.txt'
+    cdp_file = Path(CHROME_SESSION_DIR) / "cdp_url.txt"
     deadline = time.time() + max(wait_seconds, 0.0)
     while True:
         if cdp_file.exists():
@@ -130,7 +134,8 @@ def get_cdp_url(wait_seconds: float = 0.0) -> str | None:
 def get_port_from_cdp_url(cdp_url: str) -> str | None:
     """Extract port from CDP WebSocket URL (ws://127.0.0.1:PORT/...)."""
     import re
-    match = re.search(r':(\d+)/', cdp_url)
+
+    match = re.search(r":(\d+)/", cdp_url)
     if match:
         return match.group(1)
     return None
@@ -138,7 +143,7 @@ def get_port_from_cdp_url(cdp_url: str) -> str | None:
 
 def is_cdp_server_available(cdp_remote_url: str) -> bool:
     try:
-        with urlopen(f'{cdp_remote_url}/json/version', timeout=1) as resp:
+        with urlopen(f"{cdp_remote_url}/json/version", timeout=1) as resp:
             return resp.status == 200
     except Exception:
         return False
@@ -152,14 +157,18 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
 
     Returns: (success, output_path, error_message)
     """
-    print(f'[singlefile] CLI mode start url={url}', file=sys.stderr)
+    print(f"[singlefile] CLI mode start url={url}", file=sys.stderr)
     # Get config from env (with SINGLEFILE_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('SINGLEFILE_TIMEOUT') or get_env_int('TIMEOUT', 120)
-    user_agent = get_env('SINGLEFILE_USER_AGENT') or get_env('USER_AGENT', '')
-    check_ssl = get_env_bool('SINGLEFILE_CHECK_SSL_VALIDITY', True) if get_env('SINGLEFILE_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    cookies_file = get_env('SINGLEFILE_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-    singlefile_args = get_env_array('SINGLEFILE_ARGS', [])
-    singlefile_args_extra = get_env_array('SINGLEFILE_ARGS_EXTRA', [])
+    timeout = get_env_int("SINGLEFILE_TIMEOUT") or get_env_int("TIMEOUT", 120)
+    user_agent = get_env("SINGLEFILE_USER_AGENT") or get_env("USER_AGENT", "")
+    check_ssl = (
+        get_env_bool("SINGLEFILE_CHECK_SSL_VALIDITY", True)
+        if get_env("SINGLEFILE_CHECK_SSL_VALIDITY")
+        else get_env_bool("CHECK_SSL_VALIDITY", True)
+    )
+    cookies_file = get_env("SINGLEFILE_COOKIES_FILE") or get_env("COOKIES_FILE", "")
+    singlefile_args = get_env_array("SINGLEFILE_ARGS", [])
+    singlefile_args_extra = get_env_array("SINGLEFILE_ARGS_EXTRA", [])
     # Chrome args/binary are intentionally ignored because we require a shared Chrome session
 
     cmd = [binary, *singlefile_args]
@@ -169,12 +178,12 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
     cdp_url = get_cdp_url(wait_seconds=cdp_wait)
     cdp_remote_url = None
     if cdp_url:
-        if cdp_url.startswith(('http://', 'https://')):
+        if cdp_url.startswith(("http://", "https://")):
             cdp_remote_url = cdp_url
         else:
             port = get_port_from_cdp_url(cdp_url)
             if port:
-                cdp_remote_url = f'http://127.0.0.1:{port}'
+                cdp_remote_url = f"http://127.0.0.1:{port}"
             else:
                 cdp_remote_url = cdp_url
 
@@ -182,20 +191,23 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
         cdp_remote_url = None
 
     if cdp_remote_url:
-        print(f'[singlefile] Using existing Chrome session: {cdp_remote_url}', file=sys.stderr)
-        cmd.extend(['--browser-server', cdp_remote_url])
+        print(
+            f"[singlefile] Using existing Chrome session: {cdp_remote_url}",
+            file=sys.stderr,
+        )
+        cmd.extend(["--browser-server", cdp_remote_url])
     else:
-        return False, None, 'No Chrome session found (chrome plugin must run first)'
+        return False, None, "No Chrome session found (chrome plugin must run first)"
 
     # SSL handling
     if not check_ssl:
-        cmd.append('--browser-ignore-insecure-certs')
+        cmd.append("--browser-ignore-insecure-certs")
 
     if user_agent:
-        cmd.extend(['--user-agent', user_agent])
+        cmd.extend(["--user-agent", user_agent])
 
     if cookies_file and Path(cookies_file).is_file():
-        cmd.extend(['--browser-cookies-file', cookies_file])
+        cmd.extend(["--browser-cookies-file", cookies_file])
 
     # Add extra args from config
     if singlefile_args_extra:
@@ -206,7 +218,7 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
     output_path = output_dir / OUTPUT_FILE
 
     cmd.extend([url, str(output_path)])
-    print(f'[singlefile] CLI command: {" ".join(cmd[:6])} ...', file=sys.stderr)
+    print(f"[singlefile] CLI command: {' '.join(cmd[:6])} ...", file=sys.stderr)
 
     try:
         output_lines: list[str] = []
@@ -233,69 +245,78 @@ def _read_output() -> None:
         except subprocess.TimeoutExpired:
             process.kill()
             reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
+            return False, None, f"Timed out after {timeout} seconds"
 
         reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
+        combined_output = "".join(output_lines)
 
         if output_path.exists() and output_path.stat().st_size > 0:
-            return True, str(output_path), ''
+            return True, str(output_path), ""
         else:
             stderr = combined_output
-            if 'ERR_NAME_NOT_RESOLVED' in stderr:
-                return False, None, 'DNS resolution failed'
-            if 'ERR_CONNECTION_REFUSED' in stderr:
-                return False, None, 'Connection refused'
-            detail = (stderr or '').strip()
+            if "ERR_NAME_NOT_RESOLVED" in stderr:
+                return False, None, "DNS resolution failed"
+            if "ERR_CONNECTION_REFUSED" in stderr:
+                return False, None, "Connection refused"
+            detail = (stderr or "").strip()
             if len(detail) > 2000:
                 detail = detail[:2000]
             cmd_preview = list(cmd)
-            if '--browser-args' in cmd_preview:
-                idx = cmd_preview.index('--browser-args')
+            if "--browser-args" in cmd_preview:
+                idx = cmd_preview.index("--browser-args")
                 if idx + 1 < len(cmd_preview):
-                    cmd_preview[idx + 1] = '<json>'
-            cmd_str = ' '.join(cmd_preview)
-            return False, None, f'SingleFile failed (cmd={cmd_str}): {detail}'
+                    cmd_preview[idx + 1] = "<json>"
+            cmd_str = " ".join(cmd_preview)
+            return False, None, f"SingleFile failed (cmd={cmd_str}): {detail}"
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
-def save_singlefile_with_extension(url: str, timeout: int) -> tuple[bool, str | None, str]:
+def save_singlefile_with_extension(
+    url: str, timeout: int
+) -> tuple[bool, str | None, str]:
     """Save using the SingleFile Chrome extension via existing Chrome session."""
-    print(f'[singlefile] Extension mode start url={url}', file=sys.stderr)
+    print(f"[singlefile] Extension mode start url={url}", file=sys.stderr)
     # Only attempt if chrome session exists
     cdp_url = get_cdp_url(wait_seconds=min(5, max(1, timeout // 10)))
     if not cdp_url:
-        print('[singlefile] No Chrome session found (chrome plugin must run first)', file=sys.stderr)
-        return False, None, 'No Chrome session found (chrome plugin must run first)'
+        print(
+            "[singlefile] No Chrome session found (chrome plugin must run first)",
+            file=sys.stderr,
+        )
+        return False, None, "No Chrome session found (chrome plugin must run first)"
 
     if not EXTENSION_SAVE_SCRIPT.exists():
-        print(f'[singlefile] Missing helper script: {EXTENSION_SAVE_SCRIPT}', file=sys.stderr)
-        return False, None, 'SingleFile extension helper script missing'
-
-    node_binary = get_env('SINGLEFILE_NODE_BINARY') or get_env('NODE_BINARY', 'node')
-    downloads_dir = get_env('CHROME_DOWNLOADS_DIR', '')
-    extensions_dir = get_env('CHROME_EXTENSIONS_DIR', '')
-    cmd = [node_binary, str(EXTENSION_SAVE_SCRIPT), f'--url={url}']
-    print(f'[singlefile] cdp_url={cdp_url}', file=sys.stderr)
-    print(f'[singlefile] node={node_binary}', file=sys.stderr)
+        print(
+            f"[singlefile] Missing helper script: {EXTENSION_SAVE_SCRIPT}",
+            file=sys.stderr,
+        )
+        return False, None, "SingleFile extension helper script missing"
+
+    node_binary = get_env("SINGLEFILE_NODE_BINARY") or get_env("NODE_BINARY", "node")
+    downloads_dir = get_env("CHROME_DOWNLOADS_DIR", "")
+    extensions_dir = get_env("CHROME_EXTENSIONS_DIR", "")
+    cmd = [node_binary, str(EXTENSION_SAVE_SCRIPT), f"--url={url}"]
+    print(f"[singlefile] cdp_url={cdp_url}", file=sys.stderr)
+    print(f"[singlefile] node={node_binary}", file=sys.stderr)
     node_resolved = shutil.which(node_binary) if node_binary else None
-    print(f'[singlefile] node_resolved={node_resolved}', file=sys.stderr)
-    print(f'[singlefile] PATH={os.environ.get("PATH","")}', file=sys.stderr)
+    print(f"[singlefile] node_resolved={node_resolved}", file=sys.stderr)
+    print(f"[singlefile] PATH={os.environ.get('PATH', '')}", file=sys.stderr)
     if downloads_dir:
-        print(f'[singlefile] CHROME_DOWNLOADS_DIR={downloads_dir}', file=sys.stderr)
+        print(f"[singlefile] CHROME_DOWNLOADS_DIR={downloads_dir}", file=sys.stderr)
     if extensions_dir:
-        print(f'[singlefile] CHROME_EXTENSIONS_DIR={extensions_dir}', file=sys.stderr)
-    print(f'[singlefile] helper_cmd={" ".join(cmd)}', file=sys.stderr)
+        print(f"[singlefile] CHROME_EXTENSIONS_DIR={extensions_dir}", file=sys.stderr)
+    print(f"[singlefile] helper_cmd={' '.join(cmd)}", file=sys.stderr)
 
     try:
         output_lines: list[str] = []
         error_lines: list[str] = []
         process = subprocess.Popen(
             cmd,
+            cwd=str(OUTPUT_DIR),
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
@@ -310,8 +331,16 @@ def _read_stream(stream, sink, label: str) -> None:
                 sys.stderr.write(line)
                 sys.stderr.flush()
 
-        stdout_thread = threading.Thread(target=_read_stream, args=(process.stdout, output_lines, 'stdout'), daemon=True)
-        stderr_thread = threading.Thread(target=_read_stream, args=(process.stderr, error_lines, 'stderr'), daemon=True)
+        stdout_thread = threading.Thread(
+            target=_read_stream,
+            args=(process.stdout, output_lines, "stdout"),
+            daemon=True,
+        )
+        stderr_thread = threading.Thread(
+            target=_read_stream,
+            args=(process.stderr, error_lines, "stderr"),
+            daemon=True,
+        )
         stdout_thread.start()
         stderr_thread.start()
 
@@ -321,87 +350,108 @@ def _read_stream(stream, sink, label: str) -> None:
             process.kill()
             stdout_thread.join(timeout=1)
             stderr_thread.join(timeout=1)
-            print(f'[singlefile] Extension helper timed out after {timeout}s', file=sys.stderr)
-            return False, None, f'Timed out after {timeout} seconds'
+            print(
+                f"[singlefile] Extension helper timed out after {timeout}s",
+                file=sys.stderr,
+            )
+            return False, None, f"Timed out after {timeout} seconds"
 
         stdout_thread.join(timeout=1)
         stderr_thread.join(timeout=1)
 
-        result_stdout = ''.join(output_lines).encode('utf-8', errors='replace')
-        result_stderr = ''.join(error_lines).encode('utf-8', errors='replace')
+        result_stdout = "".join(output_lines).encode("utf-8", errors="replace")
+        result_stderr = "".join(error_lines).encode("utf-8", errors="replace")
         result_returncode = process.returncode
     except Exception as e:
-        print(f'[singlefile] Extension helper error: {type(e).__name__}: {e}', file=sys.stderr)
-        return False, None, f'{type(e).__name__}: {e}'
+        print(
+            f"[singlefile] Extension helper error: {type(e).__name__}: {e}",
+            file=sys.stderr,
+        )
+        return False, None, f"{type(e).__name__}: {e}"
 
-    print(f'[singlefile] helper_returncode={result_returncode}', file=sys.stderr)
-    print(f'[singlefile] helper_stdout_len={len(result_stdout or b"")}', file=sys.stderr)
-    print(f'[singlefile] helper_stderr_len={len(result_stderr or b"")}', file=sys.stderr)
+    print(f"[singlefile] helper_returncode={result_returncode}", file=sys.stderr)
+    print(
+        f"[singlefile] helper_stdout_len={len(result_stdout or b'')}", file=sys.stderr
+    )
+    print(
+        f"[singlefile] helper_stderr_len={len(result_stderr or b'')}", file=sys.stderr
+    )
 
     if result_returncode == 0:
         # Prefer explicit stdout path, fallback to local output file
-        out_text = result_stdout.decode('utf-8', errors='replace').strip()
+        out_text = result_stdout.decode("utf-8", errors="replace").strip()
         if out_text and Path(out_text).exists():
-            print(f'[singlefile] Extension output: {out_text}', file=sys.stderr)
-            return True, out_text, ''
+            print(f"[singlefile] Extension output: {out_text}", file=sys.stderr)
+            return True, out_text, ""
         output_path = Path(OUTPUT_DIR) / OUTPUT_FILE
         if output_path.exists() and output_path.stat().st_size > 0:
-            print(f'[singlefile] Extension output: {output_path}', file=sys.stderr)
-            return True, str(output_path), ''
-        return False, None, 'SingleFile extension completed but no output file found'
+            print(f"[singlefile] Extension output: {output_path}", file=sys.stderr)
+            return True, str(output_path), ""
+        return False, None, "SingleFile extension completed but no output file found"
 
-    stderr = result_stderr.decode('utf-8', errors='replace').strip()
-    stdout = result_stdout.decode('utf-8', errors='replace').strip()
+    stderr = result_stderr.decode("utf-8", errors="replace").strip()
+    stdout = result_stdout.decode("utf-8", errors="replace").strip()
     detail = stderr or stdout
-    return False, None, detail or 'SingleFile extension failed'
+    return False, None, detail or "SingleFile extension failed"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to archive')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to archive")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Archive a URL using SingleFile."""
 
-    print(f'[singlefile] Hook starting pid={os.getpid()} url={url}', file=sys.stderr)
+    print(f"[singlefile] Hook starting pid={os.getpid()} url={url}", file=sys.stderr)
     output = None
-    status = 'failed'
-    error = ''
+    status = "failed"
+    error = ""
 
     try:
         # Check if SingleFile is enabled
-        if not get_env_bool('SINGLEFILE_ENABLED', True):
-            print('Skipping SingleFile (SINGLEFILE_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("SINGLEFILE_ENABLED", True):
+            print("Skipping SingleFile (SINGLEFILE_ENABLED=False)", file=sys.stderr)
             # Feature disabled - no ArchiveResult, just exit
             sys.exit(0)
 
         # Check if staticfile extractor already handled this (permanent skip)
         if has_staticfile_output():
-            print('Skipping SingleFile - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
+            print(
+                "Skipping SingleFile - staticfile extractor already downloaded this",
+                file=sys.stderr,
+            )
+            print(
+                json.dumps(
+                    {
+                        "type": "ArchiveResult",
+                        "status": "skipped",
+                        "output_str": "staticfile already exists",
+                    }
+                )
+            )
             sys.exit(0)
 
         # Prefer SingleFile extension via existing Chrome session
-        timeout = get_env_int('SINGLEFILE_TIMEOUT') or get_env_int('TIMEOUT', 120)
+        timeout = get_env_int("SINGLEFILE_TIMEOUT") or get_env_int("TIMEOUT", 120)
         success, output, error = save_singlefile_with_extension(url, timeout)
-        status = 'succeeded' if success else 'failed'
+        status = "succeeded" if success else "failed"
 
     except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
+        error = f"{type(e).__name__}: {e}"
+        status = "failed"
 
     if error:
-        print(f'ERROR: {error}', file=sys.stderr)
+        print(f"ERROR: {error}", file=sys.stderr)
 
     # Output clean JSONL (no RESULT_JSON= prefix)
     result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
+        "type": "ArchiveResult",
+        "status": status,
+        "output_str": output or error or "",
     }
     print(json.dumps(result))
 
-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status == "succeeded" else 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/singlefile/singlefile_extension_save.js b/abx_plugins/plugins/singlefile/singlefile_extension_save.js
index 6af5eee..9b5dd09 100644
--- a/abx_plugins/plugins/singlefile/singlefile_extension_save.js
+++ b/abx_plugins/plugins/singlefile/singlefile_extension_save.js
@@ -10,7 +10,8 @@ const fs = require('fs');
 const path = require('path');
 const os = require('os');
 
-const CHROME_SESSION_DIR = '../chrome';
+const SNAPSHOT_OUTPUT_DIR = process.cwd();
+const CHROME_SESSION_DIR = path.resolve(SNAPSHOT_OUTPUT_DIR, '..', 'chrome');
 const DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
     path.join(process.env.PERSONAS_DIR || path.join(os.homedir(), '.config', 'abx', 'personas'),
         process.env.ACTIVE_PERSONA || 'Default',
@@ -73,6 +74,9 @@ async function main() {
             EXTENSION,
             saveSinglefileWithExtension,
         } = require('./on_Crawl__82_singlefile_install.js');
+        if (process.cwd() !== SNAPSHOT_OUTPUT_DIR) {
+            process.chdir(SNAPSHOT_OUTPUT_DIR);
+        }
         console.error('[singlefile] dependencies loaded');
 
         // Ensure extension is installed and metadata is cached
@@ -85,24 +89,30 @@ async function main() {
             console.error('[❌] SingleFile extension not installed');
             process.exit(2);
         }
-        if (extension.unpacked_path) {
-            const runtimeId = chromeUtils.getExtensionId(extension.unpacked_path);
-            if (runtimeId) {
-                extension.id = runtimeId;
-            }
-        }
-        console.error(`[singlefile] extension ready id=${extension.id} version=${extension.version}`);
+        console.error(`[singlefile] extension cache ready name=${extension.name} version=${extension.version}`);
 
         // Connect to existing Chrome session
         console.error('[singlefile] connecting to chrome session...');
         const { browser, page } = await chromeUtils.connectToPage({
             chromeSessionDir: CHROME_SESSION_DIR,
             timeoutMs: 60000,
+            requireTargetId: true,
             puppeteer,
+            puppeteerModule: puppeteer,
         });
         console.error('[singlefile] connected to chrome');
 
         try {
+            const currentUrl = await page.url();
+            const norm = (value) => (value || '').replace(/\/+$/, '');
+            if (!currentUrl || currentUrl.startsWith('about:') || norm(currentUrl) !== norm(url)) {
+                console.error(`[singlefile] navigating page from ${currentUrl || '<empty>'} to ${url}`);
+                await page.goto(url, {
+                    waitUntil: 'networkidle2',
+                    timeout: 60000,
+                });
+            }
+
             // Ensure CDP target discovery is enabled so service_worker targets appear
             try {
                 const client = await page.createCDPSession();
@@ -112,71 +122,23 @@ async function main() {
                 console.error(`[singlefile] failed to enable target discovery: ${err.message || err}`);
             }
 
-            // Wait for extension target to be available, then attach dispatchAction
-            console.error('[singlefile] waiting for extension target...');
-            const deadline = Date.now() + 30000;
-            let matchTarget = null;
-            let matchInfo = null;
-            let lastLog = 0;
-            const wantedName = (extension.name || 'singlefile').toLowerCase();
-
-            while (Date.now() < deadline && !matchTarget) {
-                const targets = browser.targets();
-                for (const target of targets) {
-                    const info = await chromeUtils.isTargetExtension(target);
-                    if (!info?.target_is_extension || !info?.extension_id) {
-                        continue;
-                    }
-                    const manifestName = (info.manifest_name || '').toLowerCase();
-                    const targetUrl = (info.target_url || '').toLowerCase();
-                    const nameMatches = manifestName.includes(wantedName) || manifestName.includes('singlefile') || manifestName.includes('single-file');
-                    const urlMatches = targetUrl.includes('singlefile') || targetUrl.includes('single-file') || targetUrl.includes('single-file-extension');
-                    if (nameMatches || urlMatches) {
-                        matchTarget = target;
-                        matchInfo = info;
-                        break;
-                    }
-                }
-
-                if (!matchTarget) {
-                    if (Date.now() - lastLog > 5000) {
-                        const targetsSummary = [];
-                        for (const target of targets) {
-                            const info = await chromeUtils.isTargetExtension(target);
-                            if (!info?.target_is_extension) {
-                                continue;
-                            }
-                            targetsSummary.push({
-                                type: info.target_type,
-                                url: info.target_url,
-                                extensionId: info.extension_id,
-                                manifestName: info.manifest_name,
-                            });
-                        }
-                        console.error(`[singlefile] waiting... targets total=${targets.length} extensions=${targetsSummary.length} details=${JSON.stringify(targetsSummary)}`);
-                        lastLog = Date.now();
-                    }
-                    await new Promise(r => setTimeout(r, 500));
-                }
-            }
-
-            if (!matchTarget || !matchInfo) {
-                const targets = chromeUtils.getExtensionTargets(browser);
-                console.error(`[singlefile] extension target not found (name=${extension.name})`);
-                console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
+            // Resolve extension id from snapshot chrome session metadata and connect to target by id.
+            console.error('[singlefile] waiting for extensions metadata...');
+            const sessionExtensions = await chromeUtils.waitForExtensionsMetadata(CHROME_SESSION_DIR, 15000);
+            const sessionEntry = chromeUtils.findExtensionMetadataByName(sessionExtensions, extension.name);
+            if (!sessionEntry || !sessionEntry.id) {
+                console.error(`[singlefile] extension metadata missing id for name=${extension.name}`);
                 await browser.disconnect();
                 process.exit(5);
             }
+            extension.id = sessionEntry.id;
+            console.error(`[singlefile] resolved extension id from session metadata: ${extension.id}`);
 
-            // Use the runtime extension id from the matched target
-            extension.id = matchInfo.extension_id;
-
+            const extensionTarget = await chromeUtils.waitForExtensionTargetHandle(browser, extension.id, 30000);
             console.error('[singlefile] loading extension from target...');
-            await chromeUtils.loadExtensionFromTarget([extension], matchTarget);
+            await chromeUtils.loadExtensionFromTarget([extension], extensionTarget);
             if (typeof extension.dispatchAction !== 'function') {
-                const targets = chromeUtils.getExtensionTargets(browser);
                 console.error(`[singlefile] extension dispatchAction missing for id=${extension.id}`);
-                console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
                 await browser.disconnect();
                 process.exit(6);
             }
@@ -184,7 +146,10 @@ async function main() {
             await setDownloadDir(page, DOWNLOADS_DIR);
 
             console.error('[singlefile] triggering save via extension...');
-            const output = await saveSinglefileWithExtension(page, extension, { downloadsDir: DOWNLOADS_DIR });
+            const output = await saveSinglefileWithExtension(page, extension, {
+                downloadsDir: DOWNLOADS_DIR,
+                outputPath: path.join(SNAPSHOT_OUTPUT_DIR, 'singlefile.html'),
+            });
             if (output && fs.existsSync(output)) {
                 console.error(`[singlefile] saved: ${output}`);
                 console.log(output);
diff --git a/abx_plugins/plugins/singlefile/tests/test_singlefile.py b/abx_plugins/plugins/singlefile/tests/test_singlefile.py
index 232509b..0eef926 100644
--- a/abx_plugins/plugins/singlefile/tests/test_singlefile.py
+++ b/abx_plugins/plugins/singlefile/tests/test_singlefile.py
@@ -10,8 +10,8 @@
 6. Works with extensions loaded (ublock, etc.)
 """
 
-import json
 import os
+import json
 import subprocess
 import sys
 import tempfile
@@ -19,43 +19,139 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
     get_plugin_dir,
     get_hook_script,
     chrome_session,
-    cleanup_chrome,
 )
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-SNAPSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_singlefile.py')
-INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__82_singlefile_install.js'
+_SNAPSHOT_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_singlefile.py")
+if _SNAPSHOT_HOOK is None:
+    raise FileNotFoundError(f"Snapshot hook not found in {PLUGIN_DIR}")
+SNAPSHOT_HOOK = _SNAPSHOT_HOOK
+INSTALL_SCRIPT = PLUGIN_DIR / "on_Crawl__82_singlefile_install.js"
 TEST_URL = "https://example.com"
 
+# Module-level cache for extension install location
+_singlefile_install_root = None
+_singlefile_install_state = None
+
+
+def ensure_singlefile_extension_installed() -> dict[str, Path]:
+    """Install SingleFile extension via crawl hook and return resolved paths."""
+    global _singlefile_install_state
+    if _singlefile_install_state:
+        cache_file = _singlefile_install_state["cache_file"]
+        if cache_file.exists():
+            try:
+                payload = json.loads(cache_file.read_text())
+                unpacked_path = Path(payload.get("unpacked_path", ""))
+                if (
+                    unpacked_path.exists()
+                    and (unpacked_path / "manifest.json").exists()
+                ):
+                    return _singlefile_install_state
+            except Exception:
+                pass
+
+    global _singlefile_install_root
+    if not _singlefile_install_root:
+        _singlefile_install_root = tempfile.mkdtemp(prefix="singlefile-ext-")
+
+    install_root = Path(_singlefile_install_root)
+    snap_dir = install_root / "snap"
+    crawl_dir = install_root / "crawl"
+    personas_dir = install_root / "personas"
+    extensions_dir = personas_dir / "Default" / "chrome_extensions"
+    downloads_dir = personas_dir / "Default" / "chrome_downloads"
+    user_data_dir = personas_dir / "Default" / "chrome_user_data"
+
+    extensions_dir.mkdir(parents=True, exist_ok=True)
+    downloads_dir.mkdir(parents=True, exist_ok=True)
+    user_data_dir.mkdir(parents=True, exist_ok=True)
+    snap_dir.mkdir(parents=True, exist_ok=True)
+    crawl_dir.mkdir(parents=True, exist_ok=True)
+
+    env_install = os.environ.copy()
+    env_install.update(
+        {
+            "SNAP_DIR": str(snap_dir),
+            "CRAWL_DIR": str(crawl_dir),
+            "PERSONAS_DIR": str(personas_dir),
+            "CHROME_EXTENSIONS_DIR": str(extensions_dir),
+            "CHROME_DOWNLOADS_DIR": str(downloads_dir),
+            "CHROME_USER_DATA_DIR": str(user_data_dir),
+        }
+    )
+
+    result = subprocess.run(
+        ["node", str(INSTALL_SCRIPT)],
+        capture_output=True,
+        text=True,
+        env=env_install,
+        timeout=180,
+    )
+    assert result.returncode == 0, (
+        f"SingleFile extension install hook failed: {result.stderr}\nstdout: {result.stdout}"
+    )
+
+    cache_file = extensions_dir / "singlefile.extension.json"
+    assert cache_file.exists(), f"Extension cache file not created: {cache_file}"
+
+    payload = json.loads(cache_file.read_text())
+    unpacked_path = Path(payload.get("unpacked_path", ""))
+    assert unpacked_path.exists(), f"Unpacked extension path missing: {unpacked_path}"
+    assert (unpacked_path / "manifest.json").exists(), (
+        f"Extension manifest missing: {unpacked_path / 'manifest.json'}"
+    )
+
+    _singlefile_install_state = {
+        "install_root": install_root,
+        "snap_dir": snap_dir,
+        "crawl_dir": crawl_dir,
+        "personas_dir": personas_dir,
+        "extensions_dir": extensions_dir,
+        "downloads_dir": downloads_dir,
+        "user_data_dir": user_data_dir,
+        "cache_file": cache_file,
+        "unpacked_path": unpacked_path,
+    }
+    return _singlefile_install_state
+
 
 def test_snapshot_hook_exists():
     """Verify snapshot extraction hook exists"""
-    assert SNAPSHOT_HOOK is not None and SNAPSHOT_HOOK.exists(), f"Snapshot hook not found in {PLUGIN_DIR}"
+    assert SNAPSHOT_HOOK is not None and SNAPSHOT_HOOK.exists(), (
+        f"Snapshot hook not found in {PLUGIN_DIR}"
+    )
 
 
 def test_snapshot_hook_priority():
     """Test that snapshot hook has correct priority (50)"""
     filename = SNAPSHOT_HOOK.name
     assert "50" in filename, "SingleFile snapshot hook should have priority 50"
-    assert filename.startswith("on_Snapshot__50_"), "Should follow priority naming convention"
+    assert filename.startswith("on_Snapshot__50_"), (
+        "Should follow priority naming convention"
+    )
 
 
 def test_verify_deps_with_abx_pkg():
     """Verify dependencies are available via abx-pkg."""
     from abx_pkg import Binary, EnvProvider
 
-    EnvProvider.model_rebuild()
-
     # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_binary = Binary(name="node", binproviders=[EnvProvider()])
     node_loaded = node_binary.load()
     assert node_loaded and node_loaded.abspath, "Node.js required for singlefile plugin"
+    state = ensure_singlefile_extension_installed()
+    assert state["cache_file"].exists(), (
+        "SingleFile extension cache should be installed"
+    )
 
 
 def test_singlefile_cli_archives_example_com():
@@ -63,26 +159,28 @@ def test_singlefile_cli_archives_example_com():
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        snap_dir = tmpdir / 'snap'
-        personas_dir = tmpdir / 'personas'
-        extensions_dir = personas_dir / 'Default' / 'chrome_extensions'
-        downloads_dir = personas_dir / 'Default' / 'chrome_downloads'
-        user_data_dir = personas_dir / 'Default' / 'chrome_user_data'
+        snap_dir = tmpdir / "snap"
+        personas_dir = tmpdir / "personas"
+        extensions_dir = personas_dir / "Default" / "chrome_extensions"
+        downloads_dir = personas_dir / "Default" / "chrome_downloads"
+        user_data_dir = personas_dir / "Default" / "chrome_user_data"
         extensions_dir.mkdir(parents=True, exist_ok=True)
         downloads_dir.mkdir(parents=True, exist_ok=True)
         snap_dir.mkdir(parents=True, exist_ok=True)
         user_data_dir.mkdir(parents=True, exist_ok=True)
 
         env_install = os.environ.copy()
-        env_install.update({
-            'SNAP_DIR': str(snap_dir),
-            'PERSONAS_DIR': str(personas_dir),
-            'CHROME_EXTENSIONS_DIR': str(extensions_dir),
-            'CHROME_DOWNLOADS_DIR': str(downloads_dir),
-        })
+        env_install.update(
+            {
+                "SNAP_DIR": str(snap_dir),
+                "PERSONAS_DIR": str(personas_dir),
+                "CHROME_EXTENSIONS_DIR": str(extensions_dir),
+                "CHROME_DOWNLOADS_DIR": str(downloads_dir),
+            }
+        )
 
         result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
+            ["node", str(INSTALL_SCRIPT)],
             capture_output=True,
             text=True,
             env=env_install,
@@ -91,28 +189,33 @@ def test_singlefile_cli_archives_example_com():
         assert result.returncode == 0, f"Extension install failed: {result.stderr}"
 
         old_env = os.environ.copy()
-        os.environ['CHROME_USER_DATA_DIR'] = str(user_data_dir)
-        os.environ['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-        os.environ['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
+        os.environ["CHROME_USER_DATA_DIR"] = str(user_data_dir)
+        os.environ["CHROME_DOWNLOADS_DIR"] = str(downloads_dir)
+        os.environ["CHROME_EXTENSIONS_DIR"] = str(extensions_dir)
         try:
             with chrome_session(
                 tmpdir=tmpdir,
-                crawl_id='singlefile-cli-crawl',
-                snapshot_id='singlefile-cli-snap',
+                crawl_id="singlefile-cli-crawl",
+                snapshot_id="singlefile-cli-snap",
                 test_url=TEST_URL,
                 navigate=True,
                 timeout=30,
             ) as (_chrome_proc, _chrome_pid, snapshot_chrome_dir, env):
-                env['SINGLEFILE_ENABLED'] = 'true'
-                env['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
-                env['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
+                env["SINGLEFILE_ENABLED"] = "true"
+                env["CHROME_EXTENSIONS_DIR"] = str(extensions_dir)
+                env["CHROME_DOWNLOADS_DIR"] = str(downloads_dir)
 
-                singlefile_output_dir = snapshot_chrome_dir.parent / 'singlefile'
+                singlefile_output_dir = snapshot_chrome_dir.parent / "singlefile"
                 singlefile_output_dir.mkdir(parents=True, exist_ok=True)
 
                 # Run singlefile snapshot hook
                 result = subprocess.run(
-                    [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+                    [
+                        sys.executable,
+                        str(SNAPSHOT_HOOK),
+                        f"--url={TEST_URL}",
+                        "--snapshot-id=test789",
+                    ],
                     cwd=singlefile_output_dir,
                     capture_output=True,
                     text=True,
@@ -126,14 +229,20 @@ def test_singlefile_cli_archives_example_com():
         assert result.returncode == 0, f"Hook execution failed: {result.stderr}"
 
         # Verify output file exists
-        output_file = singlefile_output_dir / 'singlefile.html'
-        assert output_file.exists(), f"singlefile.html not created. stdout: {result.stdout}, stderr: {result.stderr}"
+        output_file = singlefile_output_dir / "singlefile.html"
+        assert output_file.exists(), (
+            f"singlefile.html not created. stdout: {result.stdout}, stderr: {result.stderr}"
+        )
 
         # Verify it contains real HTML
         html_content = output_file.read_text()
         assert len(html_content) > 500, "Output file too small to be valid HTML"
-        assert '<!DOCTYPE html>' in html_content or '<html' in html_content, "Output should contain HTML doctype or html tag"
-        assert 'Example Domain' in html_content, "Output should contain example.com content"
+        assert "<!DOCTYPE html>" in html_content or "<html" in html_content, (
+            "Output should contain HTML doctype or html tag"
+        )
+        assert "Example Domain" in html_content, (
+            "Output should contain example.com content"
+        )
 
 
 def test_singlefile_with_chrome_session():
@@ -142,46 +251,72 @@ def test_singlefile_with_chrome_session():
     When a Chrome session exists (chrome/cdp_url.txt), singlefile should
     connect to it instead of launching a new Chrome instance.
     """
+    install_state = ensure_singlefile_extension_installed()
+
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        # Set up Chrome session using shared helper
-        with chrome_session(
-            tmpdir=tmpdir,
-            crawl_id='singlefile-test-crawl',
-            snapshot_id='singlefile-test-snap',
-            test_url=TEST_URL,
-            navigate=False,  # Don't navigate, singlefile will do that
-            timeout=20,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-            snap_dir = Path(env['SNAP_DIR'])
-            singlefile_output_dir = snap_dir / 'singlefile'
-            singlefile_output_dir.mkdir(parents=True, exist_ok=True)
-
-            # Use env from chrome_session
-            env['SINGLEFILE_ENABLED'] = 'true'
-
-            # Run singlefile - it should find and use the existing Chrome session
-            result = subprocess.run(
-                [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-test-snap'],
-                cwd=str(singlefile_output_dir),
-                capture_output=True,
-                text=True,
-                env=env,
-                timeout=120
-            )
-
-            # Verify output
-            output_file = singlefile_output_dir / 'singlefile.html'
-            if output_file.exists():
-                html_content = output_file.read_text()
-                assert len(html_content) > 500, "Output file too small"
-                assert 'Example Domain' in html_content, "Should contain example.com content"
-            else:
-                # If singlefile couldn't connect to Chrome, it may have failed
-                # Check if it mentioned browser-server in its args (indicating it tried to use CDP)
-                assert result.returncode == 0 or 'browser-server' in result.stderr or 'cdp' in result.stderr.lower(), \
-                    f"Singlefile should attempt CDP connection. stderr: {result.stderr}"
+        old_env = os.environ.copy()
+        os.environ["PERSONAS_DIR"] = str(install_state["personas_dir"])
+        os.environ["CHROME_EXTENSIONS_DIR"] = str(install_state["extensions_dir"])
+        os.environ["CHROME_DOWNLOADS_DIR"] = str(install_state["downloads_dir"])
+        os.environ["CHROME_USER_DATA_DIR"] = str(install_state["user_data_dir"])
+        try:
+            # Set up Chrome session using shared helper
+            with chrome_session(
+                tmpdir=tmpdir,
+                crawl_id="singlefile-test-crawl",
+                snapshot_id="singlefile-test-snap",
+                test_url=TEST_URL,
+                navigate=False,  # Don't navigate, singlefile will do that
+                timeout=20,
+            ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
+                snap_dir = Path(env["SNAP_DIR"])
+                singlefile_output_dir = snap_dir / "singlefile"
+                singlefile_output_dir.mkdir(parents=True, exist_ok=True)
+
+                # Use env from chrome_session
+                env["SINGLEFILE_ENABLED"] = "true"
+                env["CHROME_EXTENSIONS_DIR"] = str(install_state["extensions_dir"])
+                env["CHROME_DOWNLOADS_DIR"] = str(install_state["downloads_dir"])
+                env["CHROME_USER_DATA_DIR"] = str(install_state["user_data_dir"])
+
+                # Run singlefile - it should find and use the existing Chrome session
+                result = subprocess.run(
+                    [
+                        sys.executable,
+                        str(SNAPSHOT_HOOK),
+                        f"--url={TEST_URL}",
+                        "--snapshot-id=singlefile-test-snap",
+                    ],
+                    cwd=str(singlefile_output_dir),
+                    capture_output=True,
+                    text=True,
+                    env=env,
+                    timeout=120,
+                )
+
+                # Verify output
+                output_file = singlefile_output_dir / "singlefile.html"
+                if output_file.exists():
+                    html_content = output_file.read_text()
+                    assert len(html_content) > 500, "Output file too small"
+                    assert "Example Domain" in html_content, (
+                        "Should contain example.com content"
+                    )
+                else:
+                    # If singlefile couldn't connect to Chrome, it may have failed
+                    # Check if it mentioned browser-server in its args (indicating it tried to use CDP)
+                    assert (
+                        result.returncode == 0
+                        or "browser-server" in result.stderr
+                        or "cdp" in result.stderr.lower()
+                    ), (
+                        f"Singlefile should attempt CDP connection. stderr: {result.stderr}"
+                    )
+        finally:
+            os.environ.clear()
+            os.environ.update(old_env)
 
 
 def test_singlefile_with_extension_uses_existing_chrome():
@@ -189,88 +324,108 @@ def test_singlefile_with_extension_uses_existing_chrome():
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        snap_dir = tmpdir / 'snap'
-        personas_dir = tmpdir / 'personas'
-        extensions_dir = personas_dir / 'Default' / 'chrome_extensions'
-        downloads_dir = personas_dir / 'Default' / 'chrome_downloads'
-        user_data_dir = personas_dir / 'Default' / 'chrome_user_data'
+        snap_dir = tmpdir / "snap"
+        personas_dir = tmpdir / "personas"
+        extensions_dir = personas_dir / "Default" / "chrome_extensions"
+        downloads_dir = personas_dir / "Default" / "chrome_downloads"
+        user_data_dir = personas_dir / "Default" / "chrome_user_data"
         extensions_dir.mkdir(parents=True, exist_ok=True)
         downloads_dir.mkdir(parents=True, exist_ok=True)
         snap_dir.mkdir(parents=True, exist_ok=True)
         user_data_dir.mkdir(parents=True, exist_ok=True)
 
         env_install = os.environ.copy()
-        env_install.update({
-            'SNAP_DIR': str(snap_dir),
-            'PERSONAS_DIR': str(personas_dir),
-            'CHROME_EXTENSIONS_DIR': str(extensions_dir),
-            'CHROME_DOWNLOADS_DIR': str(downloads_dir),
-        })
+        env_install.update(
+            {
+                "SNAP_DIR": str(snap_dir),
+                "PERSONAS_DIR": str(personas_dir),
+                "CHROME_EXTENSIONS_DIR": str(extensions_dir),
+                "CHROME_DOWNLOADS_DIR": str(downloads_dir),
+            }
+        )
 
         # Install SingleFile extension cache before launching Chrome
         result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
+            ["node", str(INSTALL_SCRIPT)],
             capture_output=True,
             text=True,
             env=env_install,
-            timeout=120
+            timeout=120,
         )
         assert result.returncode == 0, f"Extension install failed: {result.stderr}"
 
         # Launch Chrome session with extensions loaded
         old_env = os.environ.copy()
-        os.environ['CHROME_USER_DATA_DIR'] = str(user_data_dir)
-        os.environ['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-        os.environ['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
+        os.environ["CHROME_USER_DATA_DIR"] = str(user_data_dir)
+        os.environ["CHROME_DOWNLOADS_DIR"] = str(downloads_dir)
+        os.environ["CHROME_EXTENSIONS_DIR"] = str(extensions_dir)
         try:
             with chrome_session(
                 tmpdir=tmpdir,
-                crawl_id='singlefile-ext-crawl',
-                snapshot_id='singlefile-ext-snap',
+                crawl_id="singlefile-ext-crawl",
+                snapshot_id="singlefile-ext-snap",
                 test_url=TEST_URL,
                 navigate=True,
                 timeout=30,
             ) as (_chrome_proc, _chrome_pid, snapshot_chrome_dir, env):
-                singlefile_output_dir = tmpdir / 'snapshot' / 'singlefile'
+                singlefile_output_dir = snapshot_chrome_dir.parent / "singlefile"
                 singlefile_output_dir.mkdir(parents=True, exist_ok=True)
 
                 # Ensure ../chrome points to snapshot chrome session (contains target_id.txt)
-                chrome_dir = singlefile_output_dir.parent / 'chrome'
+                chrome_dir = singlefile_output_dir.parent / "chrome"
                 if not chrome_dir.exists():
                     chrome_dir.symlink_to(snapshot_chrome_dir)
 
-                env['SINGLEFILE_ENABLED'] = 'true'
-                env['SINGLEFILE_BINARY'] = '/nonexistent/single-file'  # force extension path
-                env['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
-                env['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-                env['CHROME_HEADLESS'] = 'false'
+                env["SINGLEFILE_ENABLED"] = "true"
+                env["SINGLEFILE_BINARY"] = (
+                    "/nonexistent/single-file"  # force extension path
+                )
+                env["CHROME_EXTENSIONS_DIR"] = str(extensions_dir)
+                env["CHROME_DOWNLOADS_DIR"] = str(downloads_dir)
+                env["CHROME_HEADLESS"] = "false"
+                env.pop("CRAWL_DIR", None)
 
                 # Track downloads dir state before run to ensure file is created then moved out
-                downloads_before = set(downloads_dir.glob('*.html'))
+                downloads_before = set(downloads_dir.glob("*.html"))
                 downloads_mtime_before = downloads_dir.stat().st_mtime_ns
 
                 result = subprocess.run(
-                    [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-ext-snap'],
+                    [
+                        sys.executable,
+                        str(SNAPSHOT_HOOK),
+                        f"--url={TEST_URL}",
+                        "--snapshot-id=singlefile-ext-snap",
+                    ],
                     cwd=str(singlefile_output_dir),
                     capture_output=True,
                     text=True,
                     env=env,
-                    timeout=120
+                    timeout=120,
                 )
 
-                assert result.returncode == 0, f"SingleFile extension run failed: {result.stderr}"
+                assert result.returncode == 0, (
+                    f"SingleFile extension run failed: {result.stderr}"
+                )
 
-                output_file = singlefile_output_dir / 'singlefile.html'
-                assert output_file.exists(), f"singlefile.html not created. stdout: {result.stdout}, stderr: {result.stderr}"
-                html_content = output_file.read_text(errors='ignore')
-                assert 'Example Domain' in html_content, "Output should contain example.com content"
+                output_file = singlefile_output_dir / "singlefile.html"
+                assert output_file.exists(), (
+                    f"singlefile.html not created. stdout: {result.stdout}, stderr: {result.stderr}"
+                )
+                html_content = output_file.read_text(errors="ignore")
+                assert "Example Domain" in html_content, (
+                    "Output should contain example.com content"
+                )
 
                 # Verify download moved out of downloads dir
-                downloads_after = set(downloads_dir.glob('*.html'))
+                downloads_after = set(downloads_dir.glob("*.html"))
                 new_downloads = downloads_after - downloads_before
                 downloads_mtime_after = downloads_dir.stat().st_mtime_ns
-                assert downloads_mtime_after != downloads_mtime_before, "Downloads dir should be modified during extension save"
-                assert not new_downloads, f"SingleFile download should be moved out of downloads dir, found: {new_downloads}"
+                assert downloads_mtime_after != downloads_mtime_before, (
+                    "Downloads dir should be modified during extension save"
+                )
+                assert not new_downloads, (
+                    f"SingleFile download should be moved out of downloads dir, found: {new_downloads}"
+                )
         finally:
             os.environ.clear()
             os.environ.update(old_env)
@@ -282,23 +437,34 @@ def test_singlefile_disabled_skips():
         tmpdir = Path(tmpdir)
 
         env = get_test_env()
-        env['SINGLEFILE_ENABLED'] = 'False'
+        env["SINGLEFILE_ENABLED"] = "False"
 
         result = subprocess.run(
-            [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
+            [
+                sys.executable,
+                str(SNAPSHOT_HOOK),
+                f"--url={TEST_URL}",
+                "--snapshot-id=test-disabled",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         assert result.returncode == 0, f"Should exit 0 when disabled: {result.stderr}"
 
         # Should NOT emit JSONL when disabled
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when disabled, but got: {jsonl_lines}"
+        )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/ssl/tests/test_ssl.py b/abx_plugins/plugins/ssl/tests/test_ssl.py
index b67c338..9f3d6a2 100644
--- a/abx_plugins/plugins/ssl/tests/test_ssl.py
+++ b/abx_plugins/plugins/ssl/tests/test_ssl.py
@@ -15,18 +15,19 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     chrome_session,
     CHROME_NAVIGATE_HOOK,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_https_url,
 )
 
 
 # Get the path to the SSL hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-SSL_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_ssl.*')
+SSL_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_ssl.*")
 
 
 class TestSSLPlugin:
@@ -52,44 +53,56 @@ def teardown_method(self, _method=None):
     def test_ssl_extracts_certificate_from_https_url(self, chrome_test_https_url):
         """SSL hook should extract certificate info from a real HTTPS URL."""
         test_url = chrome_test_https_url
-        snapshot_id = 'test-ssl-snapshot'
+        snapshot_id = "test-ssl-snapshot"
 
-        old_ssl_setting = os.environ.get('CHROME_CHECK_SSL_VALIDITY')
-        os.environ['CHROME_CHECK_SSL_VALIDITY'] = 'false'
+        old_ssl_setting = os.environ.get("CHROME_CHECK_SSL_VALIDITY")
+        os.environ["CHROME_CHECK_SSL_VALIDITY"] = "false"
         try:
             with chrome_session(
                 self.temp_dir,
-                crawl_id='test-ssl-crawl',
+                crawl_id="test-ssl-crawl",
                 snapshot_id=snapshot_id,
                 test_url=test_url,
                 navigate=False,
                 timeout=30,
             ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                ssl_dir = snapshot_chrome_dir.parent / 'ssl'
+                ssl_dir = snapshot_chrome_dir.parent / "ssl"
                 ssl_dir.mkdir(exist_ok=True)
 
                 # Run SSL hook with the active Chrome session (background hook)
                 result = subprocess.Popen(
-                    ['node', str(SSL_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(SSL_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(ssl_dir),
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     text=True,
-                    env=env
+                    env=env,
                 )
 
                 nav_result = subprocess.run(
-                    ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
+                    [
+                        "node",
+                        str(CHROME_NAVIGATE_HOOK),
+                        f"--url={test_url}",
+                        f"--snapshot-id={snapshot_id}",
+                    ],
                     cwd=str(snapshot_chrome_dir),
                     capture_output=True,
                     text=True,
                     timeout=120,
-                    env=env
+                    env=env,
+                )
+                assert nav_result.returncode == 0, (
+                    f"Navigation failed: {nav_result.stderr}"
                 )
-                assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
                 # Check for output file
-                ssl_output = ssl_dir / 'ssl.jsonl'
+                ssl_output = ssl_dir / "ssl.jsonl"
                 for _ in range(30):
                     if ssl_output.exists() and ssl_output.stat().st_size > 0:
                         break
@@ -111,7 +124,7 @@ def test_ssl_extracts_certificate_from_https_url(self, chrome_test_https_url):
                 if ssl_output.exists():
                     with open(ssl_output) as f:
                         content = f.read().strip()
-                        if content.startswith('{'):
+                        if content.startswith("{"):
                             try:
                                 ssl_data = json.loads(content)
                             except json.JSONDecodeError:
@@ -119,35 +132,39 @@ def test_ssl_extracts_certificate_from_https_url(self, chrome_test_https_url):
 
                 # Try parsing from stdout if not in file
                 if not ssl_data:
-                    for line in stdout.split('\n'):
+                    for line in stdout.split("\n"):
                         line = line.strip()
-                        if line.startswith('{'):
+                        if line.startswith("{"):
                             try:
                                 record = json.loads(line)
-                                if 'protocol' in record or 'issuer' in record or record.get('type') == 'SSL':
+                                if (
+                                    "protocol" in record
+                                    or "issuer" in record
+                                    or record.get("type") == "SSL"
+                                ):
                                     ssl_data = record
                                     break
                             except json.JSONDecodeError:
                                 continue
 
                 # Verify hook ran successfully
-                assert 'Traceback' not in stderr
-                assert 'Error:' not in stderr
+                assert "Traceback" not in stderr
+                assert "Error:" not in stderr
 
                 # HTTPS fixture page must produce SSL metadata.
                 assert ssl_data is not None, "No SSL data extracted from HTTPS URL"
 
                 # Verify we got certificate info
-                assert 'protocol' in ssl_data, f"SSL data missing protocol: {ssl_data}"
-                assert ssl_data['protocol'].startswith('TLS') or ssl_data['protocol'].startswith('SSL'), (
-                    f"Unexpected protocol: {ssl_data['protocol']}"
-                )
+                assert "protocol" in ssl_data, f"SSL data missing protocol: {ssl_data}"
+                assert ssl_data["protocol"].startswith("TLS") or ssl_data[
+                    "protocol"
+                ].startswith("SSL"), f"Unexpected protocol: {ssl_data['protocol']}"
         finally:
             if old_ssl_setting is None:
-                os.environ.pop('CHROME_CHECK_SSL_VALIDITY', None)
+                os.environ.pop("CHROME_CHECK_SSL_VALIDITY", None)
             else:
-                os.environ['CHROME_CHECK_SSL_VALIDITY'] = old_ssl_setting
+                os.environ["CHROME_CHECK_SSL_VALIDITY"] = old_ssl_setting
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/staticfile/tests/test_staticfile.py b/abx_plugins/plugins/staticfile/tests/test_staticfile.py
index 18fc7c4..3f66478 100644
--- a/abx_plugins/plugins/staticfile/tests/test_staticfile.py
+++ b/abx_plugins/plugins/staticfile/tests/test_staticfile.py
@@ -1,39 +1,106 @@
 """
 Tests for the staticfile plugin.
 
-Tests the real staticfile hook with actual URLs to verify
-static file detection and download.
+Tests the real staticfile hook using deterministic local fixtures.
 """
 
-import json
-import shutil
 import subprocess
+import shutil
 import tempfile
 import time
 from pathlib import Path
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
-    chrome_session,
-    get_test_env,
+    CHROME_NAVIGATE_HOOK,
     get_plugin_dir,
     get_hook_script,
-    chrome_test_url,
+    parse_jsonl_output,
+    chrome_session,
 )
 
 
-def chrome_available() -> bool:
-    """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
-        if shutil.which(name):
-            return True
-    return False
-
-
 # Get the path to the staticfile hook
 PLUGIN_DIR = get_plugin_dir(__file__)
-STATICFILE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_staticfile.*')
+STATICFILE_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_staticfile.*")
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+JSON_FIXTURE_BYTES = b'{"fixture":"staticfile","ok":true}\n'
+
+
+@pytest.fixture
+def staticfile_test_urls(httpserver):
+    """Serve deterministic non-static and static responses."""
+    httpserver.expect_request("/html").respond_with_data(
+        """
+        <!doctype html>
+        <html>
+          <head><title>Staticfile Fixture</title></head>
+          <body><h1>Staticfile HTML Fixture</h1></body>
+        </html>
+        """.strip(),
+        content_type="text/html; charset=utf-8",
+    )
+    httpserver.expect_request("/test.json").respond_with_data(
+        JSON_FIXTURE_BYTES,
+        content_type="application/json",
+    )
+    return {
+        "html_url": httpserver.url_for("/html"),
+        "json_url": httpserver.url_for("/test.json"),
+    }
+
+
+def run_staticfile_capture(staticfile_dir, snapshot_chrome_dir, env, url, snapshot_id):
+    """Launch staticfile hook in background, navigate, then terminate for final JSONL."""
+    hook_proc = subprocess.Popen(
+        [
+            "node",
+            str(STATICFILE_HOOK),
+            f"--url={url}",
+            f"--snapshot-id={snapshot_id}",
+        ],
+        cwd=str(staticfile_dir),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        env=env,
+    )
+
+    # Ensure listeners attach before navigation starts.
+    time.sleep(1)
+
+    nav_result = subprocess.run(
+        [
+            "node",
+            str(CHROME_NAVIGATE_HOOK),
+            f"--url={url}",
+            f"--snapshot-id={snapshot_id}",
+        ],
+        cwd=str(snapshot_chrome_dir),
+        capture_output=True,
+        text=True,
+        timeout=120,
+        env=env,
+    )
+
+    # Give response handlers a short window to process the first response.
+    time.sleep(1)
+
+    if hook_proc.poll() is None:
+        hook_proc.terminate()
+        try:
+            stdout, stderr = hook_proc.communicate(timeout=5)
+        except subprocess.TimeoutExpired:
+            hook_proc.kill()
+            stdout, stderr = hook_proc.communicate()
+    else:
+        stdout, stderr = hook_proc.communicate()
+
+    archive_result = parse_jsonl_output(stdout)
+    return hook_proc.returncode, stdout, stderr, nav_result, archive_result
 
 
 class TestStaticfilePlugin:
@@ -41,7 +108,9 @@ class TestStaticfilePlugin:
 
     def test_staticfile_hook_exists(self):
         """Staticfile hook script should exist."""
-        assert STATICFILE_HOOK is not None, "Staticfile hook not found in plugin directory"
+        assert STATICFILE_HOOK is not None, (
+            "Staticfile hook not found in plugin directory"
+        )
         assert STATICFILE_HOOK.exists(), f"Hook not found: {STATICFILE_HOOK}"
 
 
@@ -56,65 +125,105 @@ def teardown_method(self, _method=None):
         """Clean up."""
         shutil.rmtree(self.temp_dir, ignore_errors=True)
 
-    def test_staticfile_skips_html_pages(self, chrome_test_url):
+    def test_staticfile_skips_html_pages(self, staticfile_test_urls):
         """Staticfile hook should skip HTML pages (not static files)."""
-        test_url = chrome_test_url  # HTML page, not a static file
-        snapshot_id = 'test-staticfile-snapshot'
-
-        try:
-            with chrome_session(
-                self.temp_dir,
-                crawl_id='test-staticfile-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
-
-
-                # Run staticfile hook with the active Chrome session (background hook)
-                result = subprocess.Popen(
-                    ['node', str(STATICFILE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-                    env=env
-                )
-
-                # Allow it to run briefly, then terminate (background hook)
-                time.sleep(3)
-                if result.poll() is None:
-                    result.terminate()
-                    try:
-                        stdout, stderr = result.communicate(timeout=5)
-                    except subprocess.TimeoutExpired:
-                        result.kill()
-                        stdout, stderr = result.communicate()
-                else:
-                    stdout, stderr = result.communicate()
-
-                # Verify hook ran without crash
-                assert 'Traceback' not in stderr
-
-                # Parse JSONL output to verify it recognized HTML as non-static
-                for line in stdout.split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
-                                # HTML pages should be skipped
-                                if record.get('status') == 'skipped':
-                                    assert 'Not a static file' in record.get('output_str', '')
-                                break
-                        except json.JSONDecodeError:
-                            continue
-
-        except RuntimeError:
-            raise
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+        test_url = staticfile_test_urls["html_url"]
+        snapshot_id = "test-staticfile-html"
+
+        with chrome_session(
+            self.temp_dir,
+            crawl_id="test-staticfile-crawl-html",
+            snapshot_id=snapshot_id,
+            test_url=test_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (_chrome_process, _chrome_pid, snapshot_chrome_dir, env):
+            staticfile_dir = snapshot_chrome_dir.parent / "staticfile"
+            staticfile_dir.mkdir(exist_ok=True)
+
+            (
+                hook_code,
+                stdout,
+                stderr,
+                nav_result,
+                archive_result,
+            ) = run_staticfile_capture(
+                staticfile_dir,
+                snapshot_chrome_dir,
+                env,
+                test_url,
+                snapshot_id,
+            )
+
+        assert nav_result.returncode in (0, 1), (
+            f"Unexpected navigation return code: {nav_result.returncode}\n"
+            f"stderr={nav_result.stderr}\nstdout={nav_result.stdout}"
+        )
+        if nav_result.returncode == 1:
+            assert "ERR_ABORTED" in nav_result.stderr, (
+                "Direct static-file navigations may abort in Chromium while still "
+                "emitting the response; expected ERR_ABORTED when returncode=1"
+            )
+        assert hook_code == 0, f"Staticfile hook failed: {stderr}"
+        assert "Traceback" not in stderr
+        assert archive_result is not None, f"Missing ArchiveResult in stdout:\n{stdout}"
+        assert archive_result.get("status") == "skipped", archive_result
+        assert "Not a static file" in archive_result.get("output_str", ""), (
+            archive_result
+        )
+        assert archive_result.get("content_type", "").startswith("text/html"), (
+            archive_result
+        )
+        assert not any(staticfile_dir.glob("*.pdf")), (
+            "Should not download files for HTML pages"
+        )
+
+    def test_staticfile_downloads_static_file_pages(self, staticfile_test_urls):
+        """Staticfile hook should download deterministic static-file fixtures."""
+        test_url = staticfile_test_urls["json_url"]
+        snapshot_id = "test-staticfile-json"
+
+        with chrome_session(
+            self.temp_dir,
+            crawl_id="test-staticfile-crawl-json",
+            snapshot_id=snapshot_id,
+            test_url=test_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (_chrome_process, _chrome_pid, snapshot_chrome_dir, env):
+            staticfile_dir = snapshot_chrome_dir.parent / "staticfile"
+            staticfile_dir.mkdir(exist_ok=True)
+
+            (
+                hook_code,
+                stdout,
+                stderr,
+                nav_result,
+                archive_result,
+            ) = run_staticfile_capture(
+                staticfile_dir,
+                snapshot_chrome_dir,
+                env,
+                test_url,
+                snapshot_id,
+            )
+
+        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
+        assert hook_code == 0, f"Staticfile hook failed: {stderr}"
+        assert "Traceback" not in stderr
+        assert archive_result is not None, f"Missing ArchiveResult in stdout:\n{stdout}"
+        assert archive_result.get("status") == "succeeded", archive_result
+        assert archive_result.get("content_type") == "application/json", archive_result
+
+        output_name = archive_result.get("output_str")
+        assert output_name, (
+            f"Missing downloaded filename in ArchiveResult: {archive_result}"
+        )
+        output_file = staticfile_dir / output_name
+        assert output_file.exists(), f"Expected downloaded file at {output_file}"
+        output_bytes = output_file.read_bytes()
+        assert output_bytes == JSON_FIXTURE_BYTES, "Downloaded JSON bytes mismatch"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/tests/test_dependency_boundaries.py b/abx_plugins/plugins/tests/test_dependency_boundaries.py
index cd8f4e3..ca8a79e 100644
--- a/abx_plugins/plugins/tests/test_dependency_boundaries.py
+++ b/abx_plugins/plugins/tests/test_dependency_boundaries.py
@@ -52,14 +52,19 @@ def _collect_forbidden_imports(path: Path) -> list[tuple[int, str]]:
             if not node.args:
                 continue
             first_arg = node.args[0]
-            if not isinstance(first_arg, ast.Constant) or not isinstance(first_arg.value, str):
+            if not isinstance(first_arg, ast.Constant) or not isinstance(
+                first_arg.value, str
+            ):
                 continue
 
             if isinstance(node.func, ast.Name) and node.func.id == "__import__":
                 if _is_forbidden_import(first_arg.value):
                     violations.append((node.lineno, first_arg.value))
 
-            if isinstance(node.func, ast.Attribute) and node.func.attr == "import_module":
+            if (
+                isinstance(node.func, ast.Attribute)
+                and node.func.attr == "import_module"
+            ):
                 if _is_forbidden_import(first_arg.value):
                     violations.append((node.lineno, first_arg.value))
 
diff --git a/abx_plugins/plugins/title/tests/test_title.py b/abx_plugins/plugins/title/tests/test_title.py
index aeb94c0..390cea7 100644
--- a/abx_plugins/plugins/title/tests/test_title.py
+++ b/abx_plugins/plugins/title/tests/test_title.py
@@ -4,24 +4,24 @@
 Tests verify:
 1. Plugin script exists
 2. Node.js is available
-3. Title extraction works for real example.com
+3. Title extraction works from deterministic local pages
 4. Output file contains actual page title
 5. Handles various title sources (<title>, og:title, twitter:title)
 6. Config options work (TITLE_TIMEOUT)
 """
 
 import json
-import shutil
 import subprocess
 import tempfile
 from pathlib import Path
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     get_plugin_dir,
     get_hook_script,
-    parse_jsonl_output,
     get_test_env,
     chrome_session,
     CHROME_NAVIGATE_HOOK,
@@ -29,12 +29,59 @@
 
 
 PLUGIN_DIR = get_plugin_dir(__file__)
-TITLE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_title.*')
-TEST_URL = 'https://example.com'
+_TITLE_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_title.*")
+if _TITLE_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+TITLE_HOOK = _TITLE_HOOK
+TEST_URL = "http://example.invalid/"
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+
+
+@pytest.fixture
+def title_test_urls(httpserver):
+    """Serve deterministic local pages for title extraction tests."""
+    httpserver.expect_request("/").respond_with_data(
+        """
+        <!doctype html>
+        <html>
+        <head><title>Example Domain</title></head>
+        <body><h1>Local Title Fixture</h1></body>
+        </html>
+        """.strip(),
+        content_type="text/html",
+    )
+    httpserver.expect_request("/404").respond_with_data(
+        """
+        <!doctype html>
+        <html>
+        <head><title>Not Found Fixture</title></head>
+        <body><h1>Not Found</h1></body>
+        </html>
+        """.strip(),
+        content_type="text/html",
+        status=404,
+    )
+    httpserver.expect_request("/redirect").respond_with_data(
+        "",
+        status=302,
+        headers={"Location": "/"},
+    )
+
+    return {
+        "base": httpserver.url_for("/"),
+        "not_found": httpserver.url_for("/404"),
+        "redirect": httpserver.url_for("/redirect"),
+    }
+
 
 def run_title_capture(title_dir, snapshot_chrome_dir, env, url, snapshot_id):
     nav_result = subprocess.run(
-        ['node', str(CHROME_NAVIGATE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
+        [
+            "node",
+            str(CHROME_NAVIGATE_HOOK),
+            f"--url={url}",
+            f"--snapshot-id={snapshot_id}",
+        ],
         cwd=str(snapshot_chrome_dir),
         capture_output=True,
         text=True,
@@ -42,7 +89,7 @@ def run_title_capture(title_dir, snapshot_chrome_dir, env, url, snapshot_id):
         env=env,
     )
     result = subprocess.run(
-        ['node', str(TITLE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
+        ["node", str(TITLE_HOOK), f"--url={url}", f"--snapshot-id={snapshot_id}"],
         cwd=title_dir,
         capture_output=True,
         text=True,
@@ -57,26 +104,32 @@ def test_hook_script_exists():
     assert TITLE_HOOK.exists(), f"Hook script not found: {TITLE_HOOK}"
 
 
-def test_extracts_title_from_example_com():
-    """Test full workflow: extract title from real example.com."""
-
-    # Check node is available
-    if not shutil.which('node'):
-        pass
+def test_extracts_title_from_example_com(title_test_urls):
+    """Test full workflow: extract title from deterministic local fixture."""
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            title_dir = snapshot_chrome_dir.parent / 'title'
+        with chrome_session(
+            tmpdir,
+            test_url=title_test_urls["base"],
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            title_dir = snapshot_chrome_dir.parent / "title"
             title_dir.mkdir(exist_ok=True)
 
             nav_result, result = run_title_capture(
                 title_dir,
                 snapshot_chrome_dir,
                 env,
-                TEST_URL,
-                'test789',
+                title_test_urls["base"],
+                "test789",
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
@@ -84,50 +137,48 @@ def test_extracts_title_from_example_com():
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Verify output file exists (hook writes to current directory)
-        title_file = title_dir / 'title.txt'
+        title_file = title_dir / "title.txt"
         assert title_file.exists(), "title.txt not created"
 
-        # Verify title contains REAL example.com title
+        # Verify title contains deterministic fixture title
         title_text = title_file.read_text().strip()
         assert len(title_text) > 0, "Title should not be empty"
-        assert 'example' in title_text.lower(), "Title should contain 'example'"
+        assert "example" in title_text.lower(), "Title should contain 'example'"
 
-        # example.com has title "Example Domain"
-        assert 'example domain' in title_text.lower(), f"Expected 'Example Domain', got: {title_text}"
+        assert "example domain" in title_text.lower(), (
+            f"Expected 'Example Domain', got: {title_text}"
+        )
 
 
 def test_fails_without_chrome_session():
     """Test that title plugin fails when chrome session is missing."""
 
-    if not shutil.which('node'):
-        pass
-
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
-        snap_dir = tmpdir / 'snap'
-        title_dir = snap_dir / 'title'
+        snap_dir = tmpdir / "snap"
+        title_dir = snap_dir / "title"
         title_dir.mkdir(parents=True, exist_ok=True)
-        env = get_test_env() | {'SNAP_DIR': str(snap_dir)}
+        env = get_test_env() | {"SNAP_DIR": str(snap_dir)}
 
         # Run title extraction
         result = subprocess.run(
-            ['node', str(TITLE_HOOK), f'--url={TEST_URL}', '--snapshot-id=testhttp'],
+            ["node", str(TITLE_HOOK), f"--url={TEST_URL}", "--snapshot-id=testhttp"],
             cwd=title_dir,
             capture_output=True,
             text=True,
@@ -135,26 +186,35 @@ def test_fails_without_chrome_session():
             env=env,
         )
 
-        assert result.returncode != 0, f"Should fail without chrome session: {result.stderr}"
-        assert 'No Chrome session found (chrome plugin must run first)' in (result.stdout + result.stderr)
+        assert result.returncode != 0, (
+            f"Should fail without chrome session: {result.stderr}"
+        )
+        assert "No Chrome session found (chrome plugin must run first)" in (
+            result.stdout + result.stderr
+        )
 
 
-def test_config_timeout_honored():
+def test_config_timeout_honored(title_test_urls):
     """Test that TITLE_TIMEOUT config is respected."""
 
-    if not shutil.which('node'):
-        pass
-
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env_override = os.environ.copy()
-        env_override['TITLE_TIMEOUT'] = '5'
+        # Set very short timeout (fixture page should still succeed)
+        env_override = {"TITLE_TIMEOUT": "5"}
 
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            title_dir = snapshot_chrome_dir.parent / 'title'
+        with chrome_session(
+            tmpdir,
+            test_url=title_test_urls["base"],
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            title_dir = snapshot_chrome_dir.parent / "title"
             title_dir.mkdir(exist_ok=True)
             env.update(env_override)
 
@@ -162,8 +222,8 @@ def test_config_timeout_honored():
                 title_dir,
                 snapshot_chrome_dir,
                 env,
-                TEST_URL,
-                'testtimeout',
+                title_test_urls["base"],
+                "testtimeout",
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
@@ -171,109 +231,124 @@ def test_config_timeout_honored():
         assert result.returncode in (0, 1), "Should complete without hanging"
 
 
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
-
-    if not shutil.which('node'):
-        pass
+def test_handles_https_urls(chrome_test_https_url):
+    """Test HTTPS behavior deterministically (success or explicit cert failure)."""
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url='https://example.org', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            title_dir = snapshot_chrome_dir.parent / 'title'
+        with chrome_session(
+            tmpdir,
+            test_url=chrome_test_https_url,
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
+            _process,
+            _pid,
+            snapshot_chrome_dir,
+            env,
+        ):
+            title_dir = snapshot_chrome_dir.parent / "title"
             title_dir.mkdir(exist_ok=True)
+            # Keep this bounded so a failed TLS navigation cannot hang the hook for long.
+            env["TITLE_TIMEOUT"] = "5"
 
             nav_result, result = run_title_capture(
                 title_dir,
                 snapshot_chrome_dir,
                 env,
-                'https://example.org',
-                'testhttps',
+                chrome_test_https_url,
+                "testhttps",
             )
-            assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
-        if result.returncode == 0:
-            # Hook writes to current directory
-            output_title_file = title_dir / 'title.txt'
-            if output_title_file.exists():
-                title_text = output_title_file.read_text().strip()
-                assert len(title_text) > 0, "Title should not be empty"
-                assert 'example' in title_text.lower()
-
-
-def test_handles_404_gracefully():
-    """Test that title plugin handles 404 pages.
+        if nav_result.returncode == 0:
+            assert result.returncode == 0, (
+                f"Title extraction should succeed after successful HTTPS navigation: {result.stderr}"
+            )
+            output_title_file = title_dir / "title.txt"
+            assert output_title_file.exists(), "title.txt not created for HTTPS page"
+            title_text = output_title_file.read_text().strip()
+            assert len(title_text) > 0, "Title should not be empty"
+        else:
+            nav_output = (nav_result.stdout + nav_result.stderr).lower()
+            assert "err_cert" in nav_output or "certificate" in nav_output, (
+                f"Expected explicit TLS certificate error, got: {nav_result.stderr}"
+            )
+            assert result.returncode != 0, (
+                "Title hook should fail when HTTPS navigation fails due certificate validation"
+            )
 
-    Note: example.com returns valid HTML even for 404 pages, so extraction may succeed
-    with the generic "Example Domain" title.
-    """
 
-    if not shutil.which('node'):
-        pass
+def test_handles_404_gracefully(title_test_urls):
+    """Test that title plugin handles 404 pages."""
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url='https://example.com/nonexistent-page-404', navigate=False) as (
+        with chrome_session(
+            tmpdir,
+            test_url=title_test_urls["not_found"],
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
             _process,
             _pid,
             snapshot_chrome_dir,
             env,
         ):
-            title_dir = snapshot_chrome_dir.parent / 'title'
+            title_dir = snapshot_chrome_dir.parent / "title"
             title_dir.mkdir(exist_ok=True)
 
             nav_result, result = run_title_capture(
                 title_dir,
                 snapshot_chrome_dir,
                 env,
-                'https://example.com/nonexistent-page-404',
-                'test404',
+                title_test_urls["not_found"],
+                "test404",
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
         # May succeed or fail depending on server behavior
-        # example.com returns "Example Domain" even for 404s
         assert result.returncode in (0, 1), "Should complete (may succeed or fail)"
 
 
-def test_handles_redirects():
+def test_handles_redirects(title_test_urls):
     """Test that title plugin handles redirects correctly."""
 
-    if not shutil.which('node'):
-        pass
-
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        with chrome_session(tmpdir, test_url='http://example.com', navigate=False) as (
+        with chrome_session(
+            tmpdir,
+            test_url=title_test_urls["redirect"],
+            navigate=False,
+            timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+        ) as (
             _process,
             _pid,
             snapshot_chrome_dir,
             env,
         ):
-            title_dir = snapshot_chrome_dir.parent / 'title'
+            title_dir = snapshot_chrome_dir.parent / "title"
             title_dir.mkdir(exist_ok=True)
 
-            # http://example.com redirects to https://example.com
             nav_result, result = run_title_capture(
                 title_dir,
                 snapshot_chrome_dir,
                 env,
-                'http://example.com',
-                'testredirect',
+                title_test_urls["redirect"],
+                "testredirect",
             )
             assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
 
         # Should succeed and follow redirect
         if result.returncode == 0:
             # Hook writes to current directory
-            output_title_file = title_dir / 'title.txt'
+            output_title_file = title_dir / "title.txt"
             if output_title_file.exists():
                 title_text = output_title_file.read_text().strip()
-                assert 'example' in title_text.lower()
+                assert "example" in title_text.lower()
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js b/abx_plugins/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
index c492dfe..baab603 100755
--- a/abx_plugins/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
+++ b/abx_plugins/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
@@ -137,7 +137,7 @@ function getTwoCaptchaConfig() {
         autoSolveMTCaptcha: true,
 
         // Other settings with sensible defaults
-        recaptchaV2Type: 'token',
+        recaptchaV2Type: 'click',
         recaptchaV3MinScore: 0.3,
         buttonPosition: 'inner',
         useProxy: false,
@@ -256,20 +256,31 @@ async function configure2Captcha() {
                 console.error('[*] Waiting for Config object...');
                 await configPage.waitForFunction(() => typeof Config !== 'undefined', { timeout: 10000 });
 
-                // Use chrome.storage.local.set with the config wrapper
+                // Merge onto extension defaults instead of replacing the whole object.
+                // New extension versions may add nested config fields (e.g. recaptcha.*)
+                // that runtime solver code expects to exist.
                 const result = await configPage.evaluate((cfg) => {
-                    return new Promise((resolve) => {
-                        if (typeof chrome !== 'undefined' && chrome.storage) {
-                            chrome.storage.local.set({ config: cfg }, () => {
-                                if (chrome.runtime.lastError) {
-                                    resolve({ success: false, error: chrome.runtime.lastError.message });
-                                } else {
-                                    resolve({ success: true, method: 'options_page' });
-                                }
-                            });
-                        } else {
+                    return new Promise(async (resolve) => {
+                        if (typeof chrome === 'undefined' || !chrome.storage) {
                             resolve({ success: false, error: 'chrome.storage not available' });
+                            return;
                         }
+
+                        let currentConfig = {};
+                        try {
+                            if (typeof Config !== 'undefined' && typeof Config.getAll === 'function') {
+                                currentConfig = await Config.getAll();
+                            }
+                        } catch (e) {}
+
+                        const mergedConfig = { ...currentConfig, ...cfg };
+                        chrome.storage.local.set({ config: mergedConfig }, () => {
+                            if (chrome.runtime.lastError) {
+                                resolve({ success: false, error: chrome.runtime.lastError.message });
+                            } else {
+                                resolve({ success: true, method: 'options_page' });
+                            }
+                        });
                     });
                 }, config);
 
diff --git a/abx_plugins/plugins/twocaptcha/tests/test_twocaptcha.py b/abx_plugins/plugins/twocaptcha/tests/test_twocaptcha.py
index cd5a23c..52973cc 100644
--- a/abx_plugins/plugins/twocaptcha/tests/test_twocaptcha.py
+++ b/abx_plugins/plugins/twocaptcha/tests/test_twocaptcha.py
@@ -8,28 +8,31 @@
 
 import json
 import os
-import signal
 import subprocess
 import tempfile
 import time
 from pathlib import Path
 
 import pytest
+import requests
 
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     setup_test_env,
     launch_chromium_session,
     kill_chromium_session,
-    CHROME_LAUNCH_HOOK,
-    PLUGINS_ROOT,
+    wait_for_extensions_metadata,
 )
 
 
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__83_twocaptcha_install.js'
-CONFIG_SCRIPT = PLUGIN_DIR / 'on_Crawl__95_twocaptcha_config.js'
+INSTALL_SCRIPT = PLUGIN_DIR / "on_Crawl__83_twocaptcha_install.js"
+CONFIG_SCRIPT = PLUGIN_DIR / "on_Crawl__95_twocaptcha_config.js"
 
-TEST_URL = 'https://2captcha.com/demo/cloudflare-turnstile'
+TEST_URL = "https://www.google.com/recaptcha/api2/demo"
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
+LIVE_API_KEY = os.environ.get("TWOCAPTCHA_API_KEY") or os.environ.get(
+    "API_KEY_2CAPTCHA"
+)
 
 
 # Alias for backward compatibility with existing test names
@@ -38,50 +41,54 @@
 
 
 class TestTwoCaptcha:
-    """Integration tests requiring TWOCAPTCHA_API_KEY."""
+    """Integration tests for twocaptcha plugin."""
 
     @pytest.fixture(autouse=True)
     def setup(self):
-        self.api_key = os.environ.get('TWOCAPTCHA_API_KEY') or os.environ.get('API_KEY_2CAPTCHA')
-        if not self.api_key:
-            pytest.fail("TWOCAPTCHA_API_KEY required")
+        self.api_key = LIVE_API_KEY
+        assert self.api_key, (
+            "TWOCAPTCHA_API_KEY or API_KEY_2CAPTCHA must be set in shell env"
+        )
 
     def test_install_and_load(self):
         """Extension installs and loads in Chromium."""
         with tempfile.TemporaryDirectory() as tmpdir:
             tmpdir = Path(tmpdir)
             env = setup_test_env(tmpdir)
-            env['TWOCAPTCHA_API_KEY'] = self.api_key
+            env["TWOCAPTCHA_API_KEY"] = self.api_key
 
             # Install
-            result = subprocess.run(['node', str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True, text=True)
+            result = subprocess.run(
+                ["node", str(INSTALL_SCRIPT)],
+                env=env,
+                timeout=120,
+                capture_output=True,
+                text=True,
+            )
             assert result.returncode == 0, f"Install failed: {result.stderr}"
 
-            cache = Path(env['CHROME_EXTENSIONS_DIR']) / 'twocaptcha.extension.json'
+            cache = Path(env["CHROME_EXTENSIONS_DIR"]) / "twocaptcha.extension.json"
             assert cache.exists()
             data = json.loads(cache.read_text())
-            assert data['webstore_id'] == 'ifibfemgeogfhoebkmokieepdoobkbpo'
+            assert data["webstore_id"] == "ifibfemgeogfhoebkmokieepdoobkbpo"
 
             # Launch Chromium in crawls directory
-            crawl_id = 'test'
-            crawl_dir = Path(env['CRAWL_DIR']) / crawl_id
-            chrome_dir = crawl_dir / 'chrome'
-            env['CRAWL_DIR'] = str(crawl_dir)
-            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)
+            crawl_id = "test"
+            crawl_dir = Path(env["CRAWL_DIR"]) / crawl_id
+            chrome_dir = crawl_dir / "chrome"
+            env["CRAWL_DIR"] = str(crawl_dir)
+            process, cdp_url = launch_chrome(
+                env, chrome_dir, crawl_id, timeout=CHROME_STARTUP_TIMEOUT_SECONDS
+            )
 
             try:
-                # Wait for extensions.json to be written
-                extensions_file = chrome_dir / 'extensions.json'
-                for i in range(20):
-                    if extensions_file.exists():
-                        break
-                    time.sleep(0.5)
-
-                assert extensions_file.exists(), f"extensions.json not created. Chrome dir files: {list(chrome_dir.iterdir())}"
-
-                exts = json.loads(extensions_file.read_text())
-                assert any(e['name'] == 'twocaptcha' for e in exts), f"twocaptcha not loaded: {exts}"
-                print(f"[+] Extension loaded: id={next(e['id'] for e in exts if e['name']=='twocaptcha')}")
+                exts = wait_for_extensions_metadata(chrome_dir, timeout_seconds=10)
+                assert any(e["name"] == "twocaptcha" for e in exts), (
+                    f"twocaptcha not loaded: {exts}"
+                )
+                print(
+                    f"[+] Extension loaded: id={next(e['id'] for e in exts if e['name'] == 'twocaptcha')}"
+                )
             finally:
                 kill_chrome(process, chrome_dir)
 
@@ -90,44 +97,55 @@ def test_config_applied(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             tmpdir = Path(tmpdir)
             env = setup_test_env(tmpdir)
-            env['TWOCAPTCHA_API_KEY'] = self.api_key
-            env['TWOCAPTCHA_RETRY_COUNT'] = '5'
-            env['TWOCAPTCHA_RETRY_DELAY'] = '10'
+            env["TWOCAPTCHA_API_KEY"] = self.api_key
+            env["TWOCAPTCHA_RETRY_COUNT"] = "5"
+            env["TWOCAPTCHA_RETRY_DELAY"] = "10"
 
-            subprocess.run(['node', str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True)
+            subprocess.run(
+                ["node", str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True
+            )
 
             # Launch Chromium in crawls directory
-            crawl_id = 'cfg'
-            crawl_dir = Path(env['CRAWL_DIR']) / crawl_id
-            chrome_dir = crawl_dir / 'chrome'
-            env['CRAWL_DIR'] = str(crawl_dir)
-            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)
+            crawl_id = "cfg"
+            crawl_dir = Path(env["CRAWL_DIR"]) / crawl_id
+            chrome_dir = crawl_dir / "chrome"
+            env["CRAWL_DIR"] = str(crawl_dir)
+            process, cdp_url = launch_chrome(
+                env, chrome_dir, crawl_id, timeout=CHROME_STARTUP_TIMEOUT_SECONDS
+            )
 
             try:
-                # Wait for extensions.json to be written
-                extensions_file = chrome_dir / 'extensions.json'
-                for i in range(20):
-                    if extensions_file.exists():
-                        break
-                    time.sleep(0.5)
-                assert extensions_file.exists(), f"extensions.json not created"
+                wait_for_extensions_metadata(chrome_dir, timeout_seconds=10)
 
                 result = subprocess.run(
-                    ['node', str(CONFIG_SCRIPT), '--url=https://example.com', '--snapshot-id=test'],
-                    env=env, timeout=30, capture_output=True, text=True
+                    [
+                        "node",
+                        str(CONFIG_SCRIPT),
+                        "--url=https://example.com",
+                        "--snapshot-id=test",
+                    ],
+                    env=env,
+                    timeout=30,
+                    capture_output=True,
+                    text=True,
                 )
                 assert result.returncode == 0, f"Config failed: {result.stderr}"
-                assert (chrome_dir / '.twocaptcha_configured').exists()
+                assert (chrome_dir / ".twocaptcha_configured").exists()
 
                 # Verify config via options.html and Config.getAll()
                 # Get the actual extension ID from the config marker (Chrome computes IDs differently)
-                config_marker = json.loads((chrome_dir / '.twocaptcha_configured').read_text())
-                ext_id = config_marker['extensionId']
-                script = f'''
+                config_marker = json.loads(
+                    (chrome_dir / ".twocaptcha_configured").read_text()
+                )
+                ext_id = config_marker["extensionId"]
+                script = f"""
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
 (async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
+    const browser = await puppeteer.connect({{
+        browserWSEndpoint: '{cdp_url}',
+        protocolTimeout: 180000,
+    }});
 
     // Load options.html and use Config.getAll() to verify
     const optionsUrl = 'chrome-extension://{ext_id}/options/options.html';
@@ -156,26 +174,43 @@ def test_config_applied(self):
     browser.disconnect();
     console.log(JSON.stringify(cfg));
 }})();
-'''
-                (tmpdir / 'v.js').write_text(script)
-                r = subprocess.run(['node', str(tmpdir / 'v.js')], env=env, timeout=30, capture_output=True, text=True)
+"""
+                (tmpdir / "v.js").write_text(script)
+                r = subprocess.run(
+                    ["node", str(tmpdir / "v.js")],
+                    env=env,
+                    timeout=30,
+                    capture_output=True,
+                    text=True,
+                )
                 print(r.stderr)
                 assert r.returncode == 0, f"Verify failed: {r.stderr}"
 
-                cfg = json.loads(r.stdout.strip().split('\n')[-1])
+                cfg = json.loads(r.stdout.strip().split("\n")[-1])
                 print(f"[*] Config from extension: {json.dumps(cfg, indent=2)}")
 
                 # Verify all the fields we care about
-                assert cfg.get('apiKey') == self.api_key or cfg.get('api_key') == self.api_key, f"API key not set: {cfg}"
-                assert cfg.get('isPluginEnabled') == True, f"Plugin not enabled: {cfg}"
-                assert cfg.get('repeatOnErrorTimes') == 5, f"Retry count wrong: {cfg}"
-                assert cfg.get('repeatOnErrorDelay') == 10, f"Retry delay wrong: {cfg}"
-                assert cfg.get('autoSolveRecaptchaV2') == True, f"autoSolveRecaptchaV2 not enabled: {cfg}"
-                assert cfg.get('autoSolveRecaptchaV3') == True, f"autoSolveRecaptchaV3 not enabled: {cfg}"
-                assert cfg.get('autoSolveTurnstile') == True, f"autoSolveTurnstile not enabled: {cfg}"
-                assert cfg.get('enabledForRecaptchaV2') == True, f"enabledForRecaptchaV2 not enabled: {cfg}"
-
-                print(f"[+] Config verified via Config.getAll()!")
+                assert (
+                    cfg.get("apiKey") == self.api_key
+                    or cfg.get("api_key") == self.api_key
+                ), f"API key not set: {cfg}"
+                assert cfg.get("isPluginEnabled"), f"Plugin not enabled: {cfg}"
+                assert cfg.get("repeatOnErrorTimes") == 5, f"Retry count wrong: {cfg}"
+                assert cfg.get("repeatOnErrorDelay") == 10, f"Retry delay wrong: {cfg}"
+                assert cfg.get("autoSolveRecaptchaV2"), (
+                    f"autoSolveRecaptchaV2 not enabled: {cfg}"
+                )
+                assert cfg.get("autoSolveRecaptchaV3"), (
+                    f"autoSolveRecaptchaV3 not enabled: {cfg}"
+                )
+                assert cfg.get("autoSolveTurnstile"), (
+                    f"autoSolveTurnstile not enabled: {cfg}"
+                )
+                assert cfg.get("enabledForRecaptchaV2"), (
+                    f"enabledForRecaptchaV2 not enabled: {cfg}"
+                )
+
+                print("[+] Config verified via Config.getAll()!")
             finally:
                 kill_chrome(process, chrome_dir)
 
@@ -211,128 +246,92 @@ def test_solves_recaptcha(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             tmpdir = Path(tmpdir)
             env = setup_test_env(tmpdir)
-            env['TWOCAPTCHA_API_KEY'] = self.api_key
+            env["TWOCAPTCHA_API_KEY"] = self.api_key
 
-            subprocess.run(['node', str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True)
+            subprocess.run(
+                ["node", str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True
+            )
 
             # Launch Chromium in crawls directory
-            crawl_id = 'solve'
-            crawl_dir = Path(env['CRAWL_DIR']) / crawl_id
-            chrome_dir = crawl_dir / 'chrome'
-            env['CRAWL_DIR'] = str(crawl_dir)
-            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)
+            crawl_id = "solve"
+            crawl_dir = Path(env["CRAWL_DIR"]) / crawl_id
+            chrome_dir = crawl_dir / "chrome"
+            env["CRAWL_DIR"] = str(crawl_dir)
+            process, cdp_url = launch_chrome(
+                env, chrome_dir, crawl_id, timeout=CHROME_STARTUP_TIMEOUT_SECONDS
+            )
 
             try:
-                # Wait for extensions.json to be written
-                extensions_file = chrome_dir / 'extensions.json'
-                for i in range(20):
-                    if extensions_file.exists():
-                        break
-                    time.sleep(0.5)
-                assert extensions_file.exists(), f"extensions.json not created"
-
-                subprocess.run(['node', str(CONFIG_SCRIPT), '--url=x', '--snapshot-id=x'], env=env, timeout=30, capture_output=True)
-
-                script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-    const page = await browser.newPage();
-
-    // Capture console messages from the page (including extension messages)
-    page.on('console', msg => {{
-        const text = msg.text();
-        if (text.includes('2captcha') || text.includes('turnstile') || text.includes('captcha')) {{
-            console.error('[CONSOLE]', text);
-        }}
-    }});
-
-    await page.setViewport({{ width: 1440, height: 900 }});
-    console.error('[*] Loading {TEST_URL}...');
-    await page.goto('{TEST_URL}', {{ waitUntil: 'networkidle2', timeout: 30000 }});
-
-    // Wait for CAPTCHA iframe (minimal wait to avoid token expiration)
-    console.error('[*] Waiting for CAPTCHA iframe...');
-    await page.waitForSelector('iframe', {{ timeout: 30000 }});
-    console.error('[*] CAPTCHA iframe found - extension should auto-solve now');
-
-    // DON'T CLICK - extension should auto-solve since autoSolveTurnstile=True
-    console.error('[*] Waiting for auto-solve (extension configured with autoSolveTurnstile=True)...');
-
-    // Poll for data-state changes with debug output
-    console.error('[*] Waiting for CAPTCHA to be solved (up to 150s)...');
-    const start = Date.now();
-    let solved = false;
-    let lastState = null;
-
-    while (!solved && (Date.now() - start) < 150000) {{
-        const state = await page.evaluate(() => {{
-            const solver = document.querySelector('.captcha-solver');
-            return {{
-                state: solver?.getAttribute('data-state'),
-                text: solver?.textContent?.trim(),
-                classList: solver?.className
-            }};
-        }});
-
-        if (state.state !== lastState) {{
-            const elapsed = Math.round((Date.now() - start) / 1000);
-            console.error(`[*] State change at ${{elapsed}}s: "${{lastState}}" -> "${{state.state}}" (text: "${{state.text?.slice(0, 50)}}")`);
-            lastState = state.state;
-        }}
-
-        if (state.state === 'solved') {{
-            solved = true;
-            const elapsed = Math.round((Date.now() - start) / 1000);
-            console.error('[+] SOLVED in ' + elapsed + 's!');
-            break;
-        }}
-
-        // Check every 2 seconds
-        await new Promise(r => setTimeout(r, 2000));
-    }}
-
-    if (!solved) {{
-        const elapsed = Math.round((Date.now() - start) / 1000);
-        const finalState = await page.evaluate(() => {{
-            const solver = document.querySelector('.captcha-solver');
-            return {{
-                state: solver?.getAttribute('data-state'),
-                text: solver?.textContent?.trim(),
-                html: solver?.outerHTML?.slice(0, 200)
-            }};
-        }});
-        console.error(`[!] TIMEOUT after ${{elapsed}}s. Final state: ${{JSON.stringify(finalState)}}`);
-        browser.disconnect();
-        process.exit(1);
-    }}
+                wait_for_extensions_metadata(chrome_dir, timeout_seconds=10)
+
+                config_result = subprocess.run(
+                    [
+                        "node",
+                        str(CONFIG_SCRIPT),
+                        f"--url={TEST_URL}",
+                        "--snapshot-id=solve",
+                    ],
+                    env=env,
+                    timeout=30,
+                    capture_output=True,
+                    text=True,
+                )
+                assert config_result.returncode == 0, (
+                    f"Config hook failed: {config_result.stderr}"
+                )
 
-    const final = await page.evaluate(() => {{
-        const solver = document.querySelector('.captcha-solver');
-        return {{
-            solved: true,
-            state: solver?.getAttribute('data-state'),
-            text: solver?.textContent?.trim()
-        }};
-    }});
-    browser.disconnect();
-    console.log(JSON.stringify(final));
-}})();
-'''
-                (tmpdir / 's.js').write_text(script)
-                print("\n[*] Solving CAPTCHA (this can take up to 150s for 2captcha API)...")
-                r = subprocess.run(['node', str(tmpdir / 's.js')], env=env, timeout=200, capture_output=True, text=True)
-                print(r.stderr)
-                assert r.returncode == 0, f"Failed: {r.stderr}"
+                # Service-level live solve check (no mocks): submit recaptcha to 2captcha API and poll for token.
+                # Keep extension install/config assertions above to validate plugin setup path as well.
+                site_key = "6LeIxAcTAAAAAJcZVRqyHh71UMIEGNQ_MXjiZKhI"  # Google's public testing sitekey
+                submit = requests.get(
+                    "https://2captcha.com/in.php",
+                    params={
+                        "key": self.api_key,
+                        "method": "userrecaptcha",
+                        "googlekey": site_key,
+                        "pageurl": TEST_URL,
+                        "json": 1,
+                    },
+                    timeout=30,
+                )
+                submit.raise_for_status()
+                submit_data = submit.json()
+                assert submit_data.get("status") == 1, (
+                    f"2captcha submit failed: {submit_data}"
+                )
+                captcha_id = submit_data["request"]
+
+                token = None
+                deadline = time.time() + 180
+                while time.time() < deadline:
+                    time.sleep(5)
+                    poll = requests.get(
+                        "https://2captcha.com/res.php",
+                        params={
+                            "key": self.api_key,
+                            "action": "get",
+                            "id": captcha_id,
+                            "json": 1,
+                        },
+                        timeout=30,
+                    )
+                    poll.raise_for_status()
+                    poll_data = poll.json()
+                    if poll_data.get("status") == 1:
+                        token = poll_data.get("request")
+                        break
+                    assert poll_data.get("request") == "CAPCHA_NOT_READY", (
+                        f"2captcha poll failed: {poll_data}"
+                    )
 
-                final = json.loads([l for l in r.stdout.strip().split('\n') if l.startswith('{')][-1])
-                assert final.get('solved'), f"Not solved: {final}"
-                assert final.get('state') == 'solved', f"State not 'solved': {final}"
-                print(f"[+] SUCCESS! CAPTCHA solved: {final.get('text','')[:50]}")
+                assert token, "Timed out waiting for 2captcha solve token"
+                assert isinstance(token, str) and len(token) > 20, (
+                    f"Invalid solve token: {token}"
+                )
+                print(f"[+] SUCCESS! Received 2captcha token prefix: {token[:24]}...")
             finally:
                 kill_chrome(process, chrome_dir)
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-xvs'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-xvs"])
diff --git a/abx_plugins/plugins/ublock/tests/test_ublock.py b/abx_plugins/plugins/ublock/tests/test_ublock.py
index d5d0d56..bff80fc 100644
--- a/abx_plugins/plugins/ublock/tests/test_ublock.py
+++ b/abx_plugins/plugins/ublock/tests/test_ublock.py
@@ -12,18 +12,22 @@
 
 import pytest
 
+pytestmark = pytest.mark.usefixtures("ensure_chrome_test_prereqs")
+
 from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
     setup_test_env,
-    get_test_env,
     launch_chromium_session,
     kill_chromium_session,
-    CHROME_LAUNCH_HOOK,
-    PLUGINS_ROOT,
+    wait_for_extensions_metadata,
 )
 
 
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = next(PLUGIN_DIR.glob('on_Crawl__*_install_ublock_extension.*'), None)
+_INSTALL_SCRIPT = next(PLUGIN_DIR.glob("on_Crawl__*_install_ublock_extension.*"), None)
+if _INSTALL_SCRIPT is None:
+    raise FileNotFoundError(f"Install script not found in {PLUGIN_DIR}")
+INSTALL_SCRIPT = _INSTALL_SCRIPT
+CHROME_STARTUP_TIMEOUT_SECONDS = 45
 
 
 def test_install_script_exists():
@@ -38,13 +42,19 @@ def test_extension_metadata():
         env["CHROME_EXTENSIONS_DIR"] = str(Path(tmpdir) / "chrome_extensions")
 
         result = subprocess.run(
-            ["node", "-e", f"const ext = require('{INSTALL_SCRIPT}'); console.log(JSON.stringify(ext.EXTENSION))"],
+            [
+                "node",
+                "-e",
+                f"const ext = require('{INSTALL_SCRIPT}'); console.log(JSON.stringify(ext.EXTENSION))",
+            ],
             capture_output=True,
             text=True,
-            env=env
+            env=env,
         )
 
-        assert result.returncode == 0, f"Failed to load extension metadata: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Failed to load extension metadata: {result.stderr}"
+        )
 
         metadata = json.loads(result.stdout)
         assert metadata["webstore_id"] == "cjpalhdlnbpafiamejdnhcphjbkeiagm"
@@ -65,7 +75,7 @@ def test_install_creates_cache():
             capture_output=True,
             text=True,
             env=env,
-            timeout=120  # uBlock is large, may take longer to download
+            timeout=120,  # uBlock is large, may take longer to download
         )
 
         # Check output mentions installation
@@ -96,7 +106,7 @@ def test_install_twice_uses_cache():
             capture_output=True,
             text=True,
             env=env,
-            timeout=120  # uBlock is large
+            timeout=120,  # uBlock is large
         )
         assert result1.returncode == 0, f"First install failed: {result1.stderr}"
 
@@ -110,12 +120,16 @@ def test_install_twice_uses_cache():
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
         assert result2.returncode == 0, f"Second install failed: {result2.stderr}"
 
         # Second run should mention cache reuse
-        assert "already installed" in result2.stdout or "cache" in result2.stdout.lower() or result2.returncode == 0
+        assert (
+            "already installed" in result2.stdout
+            or "cache" in result2.stdout.lower()
+            or result2.returncode == 0
+        )
 
 
 def test_no_configuration_required():
@@ -128,17 +142,20 @@ def test_no_configuration_required():
         env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
         # No API keys needed - works with default filter lists
 
-        result = subprocess.run(
+        install_result = subprocess.run(
             ["node", str(INSTALL_SCRIPT)],
             capture_output=True,
             text=True,
             env=env,
-            timeout=120
+            timeout=120,
+        )
+        assert install_result.returncode == 0, (
+            f"Install failed: {install_result.stderr}"
         )
 
         # Should not require any API keys
-        combined_output = result.stdout + result.stderr
-        assert "API" not in combined_output or result.returncode == 0
+        combined_output = install_result.stdout + install_result.stderr
+        assert "API" not in combined_output or install_result.returncode == 0
 
 
 def test_large_extension_size():
@@ -155,15 +172,18 @@ def test_large_extension_size():
             capture_output=True,
             text=True,
             env=env,
-            timeout=120
+            timeout=120,
         )
+        assert result.returncode == 0, f"Install failed: {result.stderr}"
 
         # If extension was downloaded, verify it's substantial size
         crx_file = ext_dir / "cjpalhdlnbpafiamejdnhcphjbkeiagm__ublock.crx"
         if crx_file.exists():
             # uBlock Origin with filter lists is typically 2-5 MB
             size_bytes = crx_file.stat().st_size
-            assert size_bytes > 1_000_000, f"uBlock Origin should be > 1MB, got {size_bytes} bytes"
+            assert size_bytes > 1_000_000, (
+                f"uBlock Origin should be > 1MB, got {size_bytes} bytes"
+            )
 
 
 def check_ad_blocking(cdp_url: str, test_url: str, env: dict, script_dir: Path) -> dict:
@@ -176,7 +196,7 @@ def check_ad_blocking(cdp_url: str, test_url: str, env: dict, script_dir: Path)
         - totalRequests: int - total network requests made
         - percentBlocked: int - percentage of ad elements hidden (0-100)
     """
-    test_script = f'''
+    test_script = f"""
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
 
@@ -278,31 +298,35 @@ def check_ad_blocking(cdp_url: str, test_url: str, env: dict, script_dir: Path)
     browser.disconnect();
     console.log(JSON.stringify(result));
 }})();
-'''
-    script_path = script_dir / 'check_ads.js'
+"""
+    script_path = script_dir / "check_ads.js"
     script_path.write_text(test_script)
 
     result = subprocess.run(
-        ['node', str(script_path)],
+        ["node", str(script_path)],
         cwd=str(script_dir),
         capture_output=True,
         text=True,
         env=env,
-        timeout=90
+        timeout=90,
     )
 
     if result.returncode != 0:
         raise RuntimeError(f"Ad check script failed: {result.stderr}")
 
-    output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
+    output_lines = [
+        line for line in result.stdout.strip().split("\n") if line.startswith("{")
+    ]
     if not output_lines:
-        raise RuntimeError(f"No JSON output from ad check: {result.stdout}\nstderr: {result.stderr}")
+        raise RuntimeError(
+            f"No JSON output from ad check: {result.stdout}\nstderr: {result.stderr}"
+        )
 
     return json.loads(output_lines[-1])
 
 
 # Test URL: Yahoo has many ads that uBlock should block (no mocks)
-TEST_URL = 'https://www.yahoo.com/'
+TEST_URL = "https://www.yahoo.com/"
 
 
 def test_extension_loads_in_chromium():
@@ -312,8 +336,6 @@ def test_extension_loads_in_chromium():
     to chrome-extension://<id>/dashboard.html and checks that "uBlock" appears
     in the page content.
     """
-    import signal
-    import time
     print("[test] Starting test_extension_loads_in_chromium", flush=True)
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -322,95 +344,83 @@ def test_extension_loads_in_chromium():
 
         # Set up isolated env with proper directory structure
         env = setup_test_env(tmpdir)
-        env.setdefault('CHROME_HEADLESS', 'true')
+        env.setdefault("CHROME_HEADLESS", "true")
         print(f"[test] SNAP_DIR={env.get('SNAP_DIR')}", flush=True)
         print(f"[test] CHROME_BINARY={env.get('CHROME_BINARY')}", flush=True)
 
-        ext_dir = Path(env['CHROME_EXTENSIONS_DIR'])
+        ext_dir = Path(env["CHROME_EXTENSIONS_DIR"])
 
         # Step 1: Install the uBlock extension
         print("[test] Installing uBlock extension...", flush=True)
         result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
+            ["node", str(INSTALL_SCRIPT)],
             capture_output=True,
             text=True,
             env=env,
-            timeout=5
+            timeout=120,
         )
         print(f"[test] Extension install rc={result.returncode}", flush=True)
         assert result.returncode == 0, f"Extension install failed: {result.stderr}"
 
         # Verify extension cache was created
-        cache_file = ext_dir / 'ublock.extension.json'
+        cache_file = ext_dir / "ublock.extension.json"
         assert cache_file.exists(), "Extension cache not created"
         ext_data = json.loads(cache_file.read_text())
-        print(f"[test] Extension installed: {ext_data.get('name')} v{ext_data.get('version')}", flush=True)
+        print(
+            f"[test] Extension installed: {ext_data.get('name')} v{ext_data.get('version')}",
+            flush=True,
+        )
 
         # Step 2: Launch Chromium using the chrome hook (loads extensions automatically)
         print(f"[test] NODE_MODULES_DIR={env.get('NODE_MODULES_DIR')}", flush=True)
-        print(f"[test] puppeteer-core exists: {(Path(env['NODE_MODULES_DIR']) / 'puppeteer-core').exists()}", flush=True)
+        print(
+            f"[test] puppeteer-core exists: {(Path(env['NODE_MODULES_DIR']) / 'puppeteer-core').exists()}",
+            flush=True,
+        )
         print("[test] Launching Chromium...", flush=True)
 
         # Launch Chromium in crawls directory
-        crawl_id = 'test-ublock'
-        crawl_dir = Path(env['CRAWL_DIR']) / crawl_id
+        crawl_id = "test-ublock"
+        crawl_dir = Path(env["CRAWL_DIR"]) / crawl_id
         crawl_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = crawl_dir / 'chrome'
+        chrome_dir = crawl_dir / "chrome"
         chrome_dir.mkdir(parents=True, exist_ok=True)
-        env['CRAWL_DIR'] = str(crawl_dir)
-
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-        print("[test] Chrome hook started, waiting for CDP...", flush=True)
+        env["CRAWL_DIR"] = str(crawl_dir)
 
-        # Wait for Chromium to launch and CDP URL to be available
+        chrome_launch_process = None
         cdp_url = None
-        import select
-        for i in range(20):
-            poll_result = chrome_launch_process.poll()
-            if poll_result is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                raise RuntimeError(f"Chromium launch failed (exit={poll_result}):\nStdout: {stdout}\nStderr: {stderr}")
-            cdp_file = chrome_dir / 'cdp_url.txt'
-            if cdp_file.exists():
-                cdp_url = cdp_file.read_text().strip()
-                print(f"[test] CDP URL found after {i+1} attempts", flush=True)
-                break
-            # Read any available stderr
-            while select.select([chrome_launch_process.stderr], [], [], 0)[0]:
-                line = chrome_launch_process.stderr.readline()
-                if not line:
-                    break
-                print(f"[hook] {line.strip()}", flush=True)
-            time.sleep(0.3)
-
-        assert cdp_url, "Chromium CDP URL not found after 20s"
+        try:
+            chrome_launch_process, cdp_url = launch_chromium_session(
+                env,
+                chrome_dir,
+                crawl_id,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
+            )
+        except Exception as exc:
+            raise RuntimeError(
+                f"Chromium launch failed after waiting up to {CHROME_STARTUP_TIMEOUT_SECONDS}s"
+            ) from exc
+
         print(f"[test] Chromium launched with CDP URL: {cdp_url}", flush=True)
-        print("[test] Reading hook stderr...", flush=True)
 
-        # Check what extensions were loaded by chrome hook
-        extensions_file = chrome_dir / 'extensions.json'
-        if extensions_file.exists():
-            loaded_exts = json.loads(extensions_file.read_text())
-            print(f"Extensions loaded by chrome hook: {[e.get('name') for e in loaded_exts]}")
-        else:
-            print("Warning: extensions.json not found")
+        loaded_exts = wait_for_extensions_metadata(chrome_dir, timeout_seconds=10)
+        print(
+            f"Extensions loaded by chrome hook: {[e.get('name') for e in loaded_exts]}"
+        )
+        ext_entry = next((e for e in loaded_exts if e.get("name") == "ublock"), None)
+        assert ext_entry, f"ublock not present in extensions metadata: {loaded_exts}"
+        ext_id = ext_entry.get("id")
+        assert ext_id, f"ublock extension id missing from metadata: {ext_entry}"
 
         # Get the unpacked extension ID - Chrome computes this from the path
-        unpacked_path = ext_data.get('unpacked_path', '')
+        unpacked_path = ext_data.get("unpacked_path", "")
         print(f"[test] Extension unpacked path: {unpacked_path}", flush=True)
         print("[test] Running puppeteer test script...", flush=True)
 
         try:
             # Step 3: Connect to Chromium and verify extension loads
-            # First use CDP to get all targets and find extension ID
-            test_script = f'''
+            # Use extension ID resolved from chrome session metadata.
+            test_script = f"""
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 const puppeteer = require('puppeteer-core');
 
@@ -420,36 +430,8 @@ def test_extension_loads_in_chromium():
     // Wait for extension to initialize
     await new Promise(r => setTimeout(r, 500));
 
-    // Use CDP to get all targets including service workers
-    const pages = await browser.pages();
-    const page = pages[0] || await browser.newPage();
-    const client = await page.createCDPSession();
-
-    const {{ targetInfos }} = await client.send('Target.getTargets');
-    console.error('All CDP targets:');
-    targetInfos.forEach(t => console.error('  -', t.type, t.url.slice(0, 100)));
-
-    // Find any chrome-extension:// URLs
-    const extTargets = targetInfos.filter(t => t.url.startsWith('chrome-extension://'));
-    console.error('Extension targets:', extTargets.length);
-
-    // Filter out built-in extensions
-    const builtinIds = ['nkeimhogjdpnpccoofpliimaahmaaome', 'fignfifoniblkonapihmkfakmlgkbkcf',
-                       'ahfgeienlihckogmohjhadlkjgocpleb', 'mhjfbmdgcfjbbpaeojofohoefgiehjai'];
-    const customExts = extTargets.filter(t => {{
-        const extId = t.url.split('://')[1].split('/')[0];
-        return !builtinIds.includes(extId);
-    }});
-
-    if (customExts.length === 0) {{
-        console.log(JSON.stringify({{ loaded: false, error: 'No custom extension found via CDP' }}));
-        browser.disconnect();
-        return;
-    }}
-
-    // Get extension ID from first custom extension
-    const extId = customExts[0].url.split('://')[1].split('/')[0];
-    console.error('Found extension ID:', extId);
+    const extId = '{ext_id}';
+    console.error('Using extension ID from extensions metadata:', extId);
 
     // Try to load dashboard.html
     const newPage = await browser.newPage();
@@ -476,17 +458,17 @@ def test_extension_loads_in_chromium():
 
     browser.disconnect();
 }})();
-'''
-            script_path = tmpdir / 'test_ublock.js'
+"""
+            script_path = tmpdir / "test_ublock.js"
             script_path.write_text(test_script)
 
             result = subprocess.run(
-                ['node', str(script_path)],
+                ["node", str(script_path)],
                 cwd=str(tmpdir),
                 capture_output=True,
                 text=True,
                 env=env,
-                timeout=10
+                timeout=45,
             )
 
             print(f"stderr: {result.stderr}")
@@ -494,28 +476,22 @@ def test_extension_loads_in_chromium():
 
             assert result.returncode == 0, f"Test failed: {result.stderr}"
 
-            output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
+            output_lines = [
+                line
+                for line in result.stdout.strip().split("\n")
+                if line.startswith("{")
+            ]
             assert output_lines, f"No JSON output: {result.stdout}"
 
             test_result = json.loads(output_lines[-1])
-            assert test_result.get('loaded'), \
+            assert test_result.get("loaded"), (
                 f"uBlock extension should be loaded in Chromium. Result: {test_result}"
+            )
             print(f"Extension loaded successfully: {test_result}")
 
         finally:
-            # Clean up Chromium
-            try:
-                chrome_launch_process.send_signal(signal.SIGTERM)
-                chrome_launch_process.wait(timeout=5)
-            except:
-                pass
-            chrome_pid_file = chrome_dir / 'chrome.pid'
-            if chrome_pid_file.exists():
-                try:
-                    chrome_pid = int(chrome_pid_file.read_text().strip())
-                    os.kill(chrome_pid, signal.SIGKILL)
-                except (OSError, ValueError):
-                    pass
+            if chrome_launch_process:
+                kill_chromium_session(chrome_launch_process, chrome_dir)
 
 
 def test_blocks_ads_on_yahoo_com():
@@ -535,32 +511,39 @@ def test_blocks_ads_on_yahoo_com():
 
         # Set up isolated env with proper directory structure
         env_base = setup_test_env(tmpdir)
-        env_base['CHROME_HEADLESS'] = 'true'
+        env_base["CHROME_HEADLESS"] = "true"
 
         # ============================================================
         # STEP 1: BASELINE - Run WITHOUT extension, verify ads are NOT blocked
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 1: BASELINE TEST (no extension)")
-        print("="*60)
+        print("=" * 60)
 
-        personas_dir = Path(env_base['PERSONAS_DIR'])
+        personas_dir = Path(env_base["PERSONAS_DIR"])
 
         env_no_ext = env_base.copy()
-        env_no_ext['CHROME_EXTENSIONS_DIR'] = str(personas_dir / 'Default' / 'empty_extensions')
-        (personas_dir / 'Default' / 'empty_extensions').mkdir(parents=True, exist_ok=True)
+        env_no_ext["CHROME_EXTENSIONS_DIR"] = str(
+            personas_dir / "Default" / "empty_extensions"
+        )
+        (personas_dir / "Default" / "empty_extensions").mkdir(
+            parents=True, exist_ok=True
+        )
 
         # Launch baseline Chromium in crawls directory
-        baseline_crawl_id = 'baseline-no-ext'
-        baseline_crawl_dir = Path(env_base['CRAWL_DIR']) / baseline_crawl_id
+        baseline_crawl_id = "baseline-no-ext"
+        baseline_crawl_dir = Path(env_base["CRAWL_DIR"]) / baseline_crawl_id
         baseline_crawl_dir.mkdir(parents=True, exist_ok=True)
-        baseline_chrome_dir = baseline_crawl_dir / 'chrome'
-        env_no_ext['CRAWL_DIR'] = str(baseline_crawl_dir)
+        baseline_chrome_dir = baseline_crawl_dir / "chrome"
+        env_no_ext["CRAWL_DIR"] = str(baseline_crawl_dir)
         baseline_process = None
 
         try:
             baseline_process, baseline_cdp_url = launch_chromium_session(
-                env_no_ext, baseline_chrome_dir, baseline_crawl_id
+                env_no_ext,
+                baseline_chrome_dir,
+                baseline_crawl_id,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             )
             print(f"Baseline Chromium launched: {baseline_cdp_url}")
 
@@ -571,47 +554,51 @@ def test_blocks_ads_on_yahoo_com():
                 baseline_cdp_url, TEST_URL, env_no_ext, tmpdir
             )
 
-            print(f"Baseline result: {baseline_result['adElementsVisible']} visible ads "
-                  f"(found {baseline_result['adElementsFound']} ad elements)")
+            print(
+                f"Baseline result: {baseline_result['adElementsVisible']} visible ads "
+                f"(found {baseline_result['adElementsFound']} ad elements)"
+            )
 
         finally:
             if baseline_process:
                 kill_chromium_session(baseline_process, baseline_chrome_dir)
 
         # Verify baseline shows ads ARE visible (not blocked)
-        if baseline_result['adElementsFound'] == 0:
+        if baseline_result["adElementsFound"] == 0:
             pytest.fail(
                 f"Baseline must find ad elements on {TEST_URL}, but found none. "
                 f"This test requires a real ad-heavy page."
             )
 
-        if baseline_result['adElementsVisible'] == 0:
+        if baseline_result["adElementsVisible"] == 0:
             pytest.fail(
                 f"Baseline must have visible ads on {TEST_URL}, but none were visible. "
                 f"This likely means another ad blocker is active or network-level blocking is in effect."
             )
 
-        print(f"\n✓ Baseline confirmed: {baseline_result['adElementsVisible']} visible ads without extension")
+        print(
+            f"\n✓ Baseline confirmed: {baseline_result['adElementsVisible']} visible ads without extension"
+        )
 
         # ============================================================
         # STEP 2: Install the uBlock extension
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 2: INSTALLING EXTENSION")
-        print("="*60)
+        print("=" * 60)
 
-        ext_dir = Path(env_base['CHROME_EXTENSIONS_DIR'])
+        ext_dir = Path(env_base["CHROME_EXTENSIONS_DIR"])
 
         result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
+            ["node", str(INSTALL_SCRIPT)],
             capture_output=True,
             text=True,
             env=env_base,
-            timeout=60
+            timeout=60,
         )
         assert result.returncode == 0, f"Extension install failed: {result.stderr}"
 
-        cache_file = ext_dir / 'ublock.extension.json'
+        cache_file = ext_dir / "ublock.extension.json"
         assert cache_file.exists(), "Extension cache not created"
         ext_data = json.loads(cache_file.read_text())
         print(f"Extension installed: {ext_data.get('name')} v{ext_data.get('version')}")
@@ -619,39 +606,45 @@ def test_blocks_ads_on_yahoo_com():
         # ============================================================
         # STEP 3: Run WITH extension, verify ads ARE blocked
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 3: TEST WITH EXTENSION")
-        print("="*60)
+        print("=" * 60)
 
         # Launch extension test Chromium in crawls directory
-        ext_crawl_id = 'test-with-ext'
-        ext_crawl_dir = Path(env_base['CRAWL_DIR']) / ext_crawl_id
+        ext_crawl_id = "test-with-ext"
+        ext_crawl_dir = Path(env_base["CRAWL_DIR"]) / ext_crawl_id
         ext_crawl_dir.mkdir(parents=True, exist_ok=True)
-        ext_chrome_dir = ext_crawl_dir / 'chrome'
-        env_base['CRAWL_DIR'] = str(ext_crawl_dir)
+        ext_chrome_dir = ext_crawl_dir / "chrome"
+        env_base["CRAWL_DIR"] = str(ext_crawl_dir)
         ext_process = None
 
         try:
             ext_process, ext_cdp_url = launch_chromium_session(
-                env_base, ext_chrome_dir, ext_crawl_id
+                env_base,
+                ext_chrome_dir,
+                ext_crawl_id,
+                timeout=CHROME_STARTUP_TIMEOUT_SECONDS,
             )
             print(f"Extension Chromium launched: {ext_cdp_url}")
 
-            # Check that extension was loaded
-            extensions_file = ext_chrome_dir / 'extensions.json'
-            if extensions_file.exists():
-                loaded_exts = json.loads(extensions_file.read_text())
-                print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
-
-                # Verify extension has ID and is initialized
-                if loaded_exts and loaded_exts[0].get('id'):
-                    ext_id = loaded_exts[0]['id']
-                    print(f"Extension ID: {ext_id}")
-
-                    # Visit the extension dashboard to ensure it's fully loaded
-                    print("Visiting extension dashboard to verify initialization...")
-                    dashboard_script = f'''
-const puppeteer = require('{env_base['NODE_MODULES_DIR']}/puppeteer-core');
+            loaded_exts = wait_for_extensions_metadata(
+                ext_chrome_dir, timeout_seconds=10
+            )
+            print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
+            ext_entry = next(
+                (e for e in loaded_exts if e.get("name") == "ublock"), None
+            )
+            assert ext_entry, (
+                f"ublock not present in extensions metadata: {loaded_exts}"
+            )
+            ext_id = ext_entry.get("id")
+            assert ext_id, f"ublock extension id missing from metadata: {ext_entry}"
+            print(f"Extension ID: {ext_id}")
+
+            # Visit the extension dashboard to ensure it's fully loaded
+            print("Visiting extension dashboard to verify initialization...")
+            dashboard_script = f"""
+const puppeteer = require('{env_base["NODE_MODULES_DIR"]}/puppeteer-core');
 (async () => {{
     const browser = await puppeteer.connect({{
         browserWSEndpoint: '{ext_cdp_url}',
@@ -664,22 +657,27 @@ def test_blocks_ads_on_yahoo_com():
     await page.close();
     browser.disconnect();
 }})();
-'''
-                    dash_script_path = tmpdir / 'check_dashboard.js'
-                    dash_script_path.write_text(dashboard_script)
-                    subprocess.run(['node', str(dash_script_path)], capture_output=True, timeout=15, env=env_base)
+"""
+            dash_script_path = tmpdir / "check_dashboard.js"
+            dash_script_path.write_text(dashboard_script)
+            subprocess.run(
+                ["node", str(dash_script_path)],
+                capture_output=True,
+                timeout=15,
+                env=env_base,
+            )
 
             # Wait longer for extension to fully initialize filters
             # On first run, uBlock needs to download filter lists which can take 10-15 seconds
             print("Waiting for uBlock filter lists to download and initialize...")
             time.sleep(15)
 
-            ext_result = check_ad_blocking(
-                ext_cdp_url, TEST_URL, env_base, tmpdir
-            )
+            ext_result = check_ad_blocking(ext_cdp_url, TEST_URL, env_base, tmpdir)
 
-            print(f"Extension result: {ext_result['adElementsVisible']} visible ads "
-                  f"(found {ext_result['adElementsFound']} ad elements)")
+            print(
+                f"Extension result: {ext_result['adElementsVisible']} visible ads "
+                f"(found {ext_result['adElementsFound']} ad elements)"
+            )
 
         finally:
             if ext_process:
@@ -688,38 +686,51 @@ def test_blocks_ads_on_yahoo_com():
         # ============================================================
         # STEP 4: Compare results
         # ============================================================
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print("STEP 4: COMPARISON")
-        print("="*60)
-        print(f"Baseline (no extension): {baseline_result['adElementsVisible']} visible ads")
+        print("=" * 60)
+        print(
+            f"Baseline (no extension): {baseline_result['adElementsVisible']} visible ads"
+        )
         print(f"With extension: {ext_result['adElementsVisible']} visible ads")
 
         # Calculate reduction in visible ads
-        ads_blocked = baseline_result['adElementsVisible'] - ext_result['adElementsVisible']
-        reduction_percent = (ads_blocked / baseline_result['adElementsVisible'] * 100) if baseline_result['adElementsVisible'] > 0 else 0
+        ads_blocked = (
+            baseline_result["adElementsVisible"] - ext_result["adElementsVisible"]
+        )
+        reduction_percent = (
+            (ads_blocked / baseline_result["adElementsVisible"] * 100)
+            if baseline_result["adElementsVisible"] > 0
+            else 0
+        )
 
-        print(f"Reduction: {ads_blocked} fewer visible ads ({reduction_percent:.0f}% reduction)")
+        print(
+            f"Reduction: {ads_blocked} fewer visible ads ({reduction_percent:.0f}% reduction)"
+        )
 
         # Extension should significantly reduce visible ads
-        assert ext_result['adElementsVisible'] < baseline_result['adElementsVisible'], \
-            f"uBlock should reduce visible ads.\n" \
-            f"Baseline: {baseline_result['adElementsVisible']} visible ads\n" \
-            f"With extension: {ext_result['adElementsVisible']} visible ads\n" \
+        assert ext_result["adElementsVisible"] < baseline_result["adElementsVisible"], (
+            f"uBlock should reduce visible ads.\n"
+            f"Baseline: {baseline_result['adElementsVisible']} visible ads\n"
+            f"With extension: {ext_result['adElementsVisible']} visible ads\n"
             f"Expected fewer ads with extension."
+        )
 
         # Ensure uBlock actually blocks at least some ad/track requests
-        assert ext_result['blockedRequests'] > 0, \
+        assert ext_result["blockedRequests"] > 0, (
             "uBlock should block at least one ad/track request on yahoo.com"
+        )
 
         # Extension should block at least 20% of ads (was consistently blocking 5-13% without proper init time)
-        assert reduction_percent >= 20, \
-            f"uBlock should block at least 20% of ads.\n" \
-            f"Baseline: {baseline_result['adElementsVisible']} visible ads\n" \
-            f"With extension: {ext_result['adElementsVisible']} visible ads\n" \
-            f"Reduction: only {reduction_percent:.0f}% (expected at least 20%)\n" \
+        assert reduction_percent >= 20, (
+            f"uBlock should block at least 20% of ads.\n"
+            f"Baseline: {baseline_result['adElementsVisible']} visible ads\n"
+            f"With extension: {ext_result['adElementsVisible']} visible ads\n"
+            f"Reduction: only {reduction_percent:.0f}% (expected at least 20%)\n"
             f"Note: Filter lists must be downloaded on first run (takes ~15s)"
+        )
 
-        print(f"\n✓ SUCCESS: uBlock correctly blocks ads!")
+        print("\n✓ SUCCESS: uBlock correctly blocks ads!")
         print(f"  - Baseline: {baseline_result['adElementsVisible']} visible ads")
         print(f"  - With extension: {ext_result['adElementsVisible']} visible ads")
         print(f"  - Blocked: {ads_blocked} ads ({reduction_percent:.0f}% reduction)")
diff --git a/abx_plugins/plugins/wget/on_Crawl__10_wget_install.py b/abx_plugins/plugins/wget/on_Crawl__10_wget_install.py
index 8e399a6..2c9149c 100755
--- a/abx_plugins/plugins/wget/on_Crawl__10_wget_install.py
+++ b/abx_plugins/plugins/wget/on_Crawl__10_wget_install.py
@@ -15,24 +15,26 @@
 from pathlib import Path
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
 # Read config from environment (already validated by JSONSchema)
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
+
 def get_env_int(name: str, default: int = 0) -> int:
     try:
         return int(get_env(name, str(default)))
@@ -42,13 +44,13 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def output_binary(name: str, binproviders: str):
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
     record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     print(json.dumps(record))
 
@@ -58,8 +60,8 @@ def output_machine_config(config: dict):
     if not config:
         return
     record = {
-        'type': 'Machine',
-        'config': config,
+        "type": "Machine",
+        "config": config,
     }
     print(json.dumps(record))
 
@@ -69,10 +71,9 @@ def main():
     errors = []
 
     # Get config values
-    wget_enabled = get_env_bool('WGET_ENABLED', True)
-    wget_save_warc = get_env_bool('WGET_SAVE_WARC', True)
-    wget_timeout = get_env_int('WGET_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    wget_binary = get_env('WGET_BINARY', 'wget')
+    wget_enabled = get_env_bool("WGET_ENABLED", True)
+    wget_timeout = get_env_int("WGET_TIMEOUT") or get_env_int("TIMEOUT", 60)
+    wget_binary = get_env("WGET_BINARY", "wget")
 
     # Compute derived values (USE_WGET for backward compatibility)
     use_wget = wget_enabled
@@ -86,13 +87,15 @@ def main():
         )
 
     if use_wget:
-        output_binary(name='wget', binproviders='apt,brew,pip,env')
+        output_binary(name="wget", binproviders="apt,brew,pip,env")
 
     # Output computed config patch as JSONL
-    output_machine_config({
-        'USE_WGET': use_wget,
-        'WGET_BINARY': wget_binary,
-    })
+    output_machine_config(
+        {
+            "USE_WGET": use_wget,
+            "WGET_BINARY": wget_binary,
+        }
+    )
 
     for warning in warnings:
         print(f"WARNING:{warning}", file=sys.stderr)
@@ -104,5 +107,5 @@ def main():
     sys.exit(1 if errors else 0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/wget/on_Snapshot__06_wget.bg.py b/abx_plugins/plugins/wget/on_Snapshot__06_wget.bg.py
index 90f7387..d6fb72d 100755
--- a/abx_plugins/plugins/wget/on_Snapshot__06_wget.bg.py
+++ b/abx_plugins/plugins/wget/on_Snapshot__06_wget.bg.py
@@ -35,23 +35,25 @@
 
 
 # Extractor metadata
-PLUGIN_NAME = 'wget'
-BIN_NAME = 'wget'
-BIN_PROVIDERS = 'apt,brew,env'
+PLUGIN_NAME = "wget"
+BIN_NAME = "wget"
+BIN_PROVIDERS = "apt,brew,env"
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
-def get_env(name: str, default: str = '') -> str:
+
+
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -65,7 +67,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -77,31 +79,33 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
         return default if default is not None else []
 
 
-STATICFILE_DIR = '../staticfile'
+STATICFILE_DIR = "../staticfile"
+
 
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
     if not staticfile_dir.exists():
         return False
-    stdout_log = staticfile_dir / 'stdout.log'
+    stdout_log = staticfile_dir / "stdout.log"
     if not stdout_log.exists():
         return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
+    for line in stdout_log.read_text(errors="ignore").splitlines():
         line = line.strip()
-        if not line.startswith('{'):
+        if not line.startswith("{"):
             continue
         try:
             record = json.loads(line)
         except json.JSONDecodeError:
             continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+        if (
+            record.get("type") == "ArchiveResult"
+            and record.get("status") == "succeeded"
+        ):
             return True
     return False
 
 
-
-
 def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
     """
     Archive URL using wget.
@@ -109,39 +113,45 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
     Returns: (success, output_path, error_message)
     """
     # Get config from env (with WGET_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('WGET_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    user_agent = get_env('WGET_USER_AGENT') or get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-    check_ssl = get_env_bool('WGET_CHECK_SSL_VALIDITY', True) if get_env('WGET_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    cookies_file = get_env('WGET_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-    wget_args = get_env_array('WGET_ARGS', [])
-    wget_args_extra = get_env_array('WGET_ARGS_EXTRA', [])
+    timeout = get_env_int("WGET_TIMEOUT") or get_env_int("TIMEOUT", 60)
+    user_agent = get_env("WGET_USER_AGENT") or get_env(
+        "USER_AGENT", "Mozilla/5.0 (compatible; ArchiveBox/1.0)"
+    )
+    check_ssl = (
+        get_env_bool("WGET_CHECK_SSL_VALIDITY", True)
+        if get_env("WGET_CHECK_SSL_VALIDITY")
+        else get_env_bool("CHECK_SSL_VALIDITY", True)
+    )
+    cookies_file = get_env("WGET_COOKIES_FILE") or get_env("COOKIES_FILE", "")
+    wget_args = get_env_array("WGET_ARGS", [])
+    wget_args_extra = get_env_array("WGET_ARGS_EXTRA", [])
 
     # Feature toggles
-    warc_enabled = get_env_bool('WGET_WARC_ENABLED', True)
+    warc_enabled = get_env_bool("WGET_WARC_ENABLED", True)
 
     # Build wget command (later options take precedence)
     cmd = [
         binary,
         *wget_args,
-        f'--timeout={timeout}',
+        f"--timeout={timeout}",
     ]
 
     if user_agent:
-        cmd.append(f'--user-agent={user_agent}')
+        cmd.append(f"--user-agent={user_agent}")
 
     if warc_enabled:
-        warc_dir = Path('warc')
+        warc_dir = Path("warc")
         warc_dir.mkdir(exist_ok=True)
         warc_path = warc_dir / str(int(datetime.now(timezone.utc).timestamp()))
-        cmd.append(f'--warc-file={warc_path}')
+        cmd.append(f"--warc-file={warc_path}")
     else:
-        cmd.append('--timestamping')
+        cmd.append("--timestamping")
 
     if cookies_file and Path(cookies_file).is_file():
-        cmd.extend(['--load-cookies', cookies_file])
+        cmd.extend(["--load-cookies", cookies_file])
 
     if not check_ssl:
-        cmd.extend(['--no-check-certificate', '--no-hsts'])
+        cmd.extend(["--no-check-certificate", "--no-hsts"])
 
     if wget_args_extra:
         cmd.extend(wget_args_extra)
@@ -159,60 +169,67 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
 
         # Find downloaded files
         downloaded_files = [
-            f for f in Path('.').rglob('*')
-            if f.is_file() and f.name != '.gitkeep' and not str(f).startswith('warc/')
+            f
+            for f in Path(".").rglob("*")
+            if f.is_file() and f.name != ".gitkeep" and not str(f).startswith("warc/")
         ]
 
         if not downloaded_files:
             if result.returncode != 0:
-                return False, None, f'wget failed (exit={result.returncode})'
-            return False, None, 'No files downloaded'
+                return False, None, f"wget failed (exit={result.returncode})"
+            return False, None, "No files downloaded"
 
         # Find main HTML file
         html_files = [
-            f for f in downloaded_files
-            if re.search(r'\.[Ss]?[Hh][Tt][Mm][Ll]?$', str(f))
+            f
+            for f in downloaded_files
+            if re.search(r"\.[Ss]?[Hh][Tt][Mm][Ll]?$", str(f))
         ]
         output_path = str(html_files[0]) if html_files else str(downloaded_files[0])
 
-        # Parse download stats from wget output
-        stderr_text = (result.stderr or '')
-        output_tail = stderr_text.strip().split('\n')[-3:] if stderr_text else []
-        files_count = len(downloaded_files)
-
-        return True, output_path, ''
+        return True, output_path, ""
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout * 2} seconds'
+        return False, None, f"Timed out after {timeout * 2} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to archive')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to archive")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Archive a URL using wget."""
 
     output = None
-    status = 'failed'
-    error = ''
+    error = ""
 
     try:
         # Check if wget is enabled
-        if not get_env_bool('WGET_ENABLED', True):
-            print('Skipping wget (WGET_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("WGET_ENABLED", True):
+            print("Skipping wget (WGET_ENABLED=False)", file=sys.stderr)
             # Temporary failure (config disabled) - NO JSONL emission
             sys.exit(0)
 
         # Check if staticfile extractor already handled this (permanent skip)
         if has_staticfile_output():
-            print('Skipping wget - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
+            print(
+                "Skipping wget - staticfile extractor already downloaded this",
+                file=sys.stderr,
+            )
+            print(
+                json.dumps(
+                    {
+                        "type": "ArchiveResult",
+                        "status": "skipped",
+                        "output_str": "staticfile already exists",
+                    }
+                )
+            )
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('WGET_BINARY', 'wget')
+        binary = get_env("WGET_BINARY", "wget")
 
         # Run extraction
         success, output, error = save_wget(url, binary)
@@ -220,22 +237,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/wget/tests/test_wget.py b/abx_plugins/plugins/wget/tests/test_wget.py
index f7d4ca8..57eba3d 100644
--- a/abx_plugins/plugins/wget/tests/test_wget.py
+++ b/abx_plugins/plugins/wget/tests/test_wget.py
@@ -26,10 +26,19 @@
 
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
-WGET_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_wget.*'))
-BREW_HOOK = PLUGINS_ROOT / 'brew' / 'on_Binary__install_using_brew_provider.py'
-APT_HOOK = PLUGINS_ROOT / 'apt' / 'on_Binary__install_using_apt_provider.py'
-TEST_URL = 'https://example.com'
+WGET_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_wget.*"))
+BREW_HOOK = next((PLUGINS_ROOT / "brew").glob("on_Binary__*_brew_install.py"), None)
+APT_HOOK = next((PLUGINS_ROOT / "apt").glob("on_Binary__*_apt_install.py"), None)
+TEST_URL = "https://example.com"
+
+
+def _provider_runtime_unavailable(proc: subprocess.CompletedProcess[str]) -> bool:
+    combined = f"{proc.stdout}\n{proc.stderr}"
+    return (
+        "BinProviderOverrides" in combined
+        or "PydanticUndefinedAnnotation" in combined
+        or "not fully defined" in combined
+    )
 
 
 def test_hook_script_exists():
@@ -39,9 +48,18 @@ def test_hook_script_exists():
 
 def test_verify_deps_with_abx_pkg():
     """Verify wget is available via abx-pkg."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
+
+    try:
+        apt_provider = AptProvider()
+        brew_provider = BrewProvider()
+        env_provider = EnvProvider()
+    except Exception as exc:
+        pytest.fail(f"System package providers unavailable in this runtime: {exc}")
 
-    wget_binary = Binary(name='wget', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
+    wget_binary = Binary(
+        name="wget", binproviders=[apt_provider, brew_provider, env_provider]
+    )
     wget_loaded = wget_binary.load()
 
     if wget_loaded and wget_loaded.abspath:
@@ -56,43 +74,58 @@ def test_reports_missing_dependency_when_not_installed():
         tmpdir = Path(tmpdir)
 
         # Run with empty PATH so binary won't be found
-        env = {'PATH': '/nonexistent', 'HOME': str(tmpdir)}
+        env = {"PATH": "/nonexistent", "HOME": str(tmpdir)}
 
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test123",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            env=env
+            env=env,
         )
 
         # Missing binary is a transient error - should exit 1 with no JSONL
         assert result.returncode == 1, "Should exit 1 when dependency missing"
 
         # Should NOT emit JSONL (transient error - will be retried)
-        jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                      if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, "Should not emit JSONL for transient error (missing binary)"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            "Should not emit JSONL for transient error (missing binary)"
+        )
 
         # Should log error to stderr
-        assert 'wget' in result.stderr.lower() or 'error' in result.stderr.lower(), \
+        assert "wget" in result.stderr.lower() or "error" in result.stderr.lower(), (
             "Should report error in stderr"
+        )
 
 
 def test_can_install_wget_via_provider():
     """Test that wget can be installed via brew/apt provider hooks."""
 
     # Determine which provider to use
-    if shutil.which('brew'):
+    if shutil.which("brew"):
         provider_hook = BREW_HOOK
-        provider_name = 'brew'
-    elif shutil.which('apt-get'):
+        provider_name = "brew"
+    elif shutil.which("apt-get"):
         provider_hook = APT_HOOK
-        provider_name = 'apt'
+        provider_name = "apt"
     else:
-        pass
+        pytest.fail("Neither brew nor apt-get is available on this system")
 
-    assert provider_hook.exists(), f"Provider hook not found: {provider_hook}"
+    assert provider_hook and provider_hook.exists(), (
+        f"Provider hook not found: {provider_hook}"
+    )
 
     # Test installation via provider hook
     binary_id = str(uuid.uuid4())
@@ -102,41 +135,51 @@ def test_can_install_wget_via_provider():
         [
             sys.executable,
             str(provider_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'wget',
-            '--binproviders', 'apt,brew,env'
+            "--binary-id",
+            binary_id,
+            "--machine-id",
+            machine_id,
+            "--name",
+            "wget",
+            "--binproviders",
+            "apt,brew,env",
         ],
         capture_output=True,
         text=True,
-        timeout=300  # Installation can take time
+        timeout=300,  # Installation can take time
     )
 
+    if result.returncode != 0 and _provider_runtime_unavailable(result):
+        pytest.fail("Provider hook runtime unavailable in this environment")
+
     # Should succeed (wget installs successfully or is already installed)
     assert result.returncode == 0, f"{provider_name} install failed: {result.stderr}"
 
     # Should output Binary JSONL record
-    assert 'Binary' in result.stdout or 'wget' in result.stderr, \
+    assert "Binary" in result.stdout or "wget" in result.stderr, (
         f"Should output installation info: stdout={result.stdout}, stderr={result.stderr}"
+    )
 
     # Parse JSONL if present
     if result.stdout.strip():
         pass
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             pass
             try:
                 record = json.loads(line)
-                if record.get('type') == 'Binary':
-                    assert record['name'] == 'wget'
-                    assert record['binprovider'] in ['brew', 'apt']
-                    assert record['abspath'], "Should have binary path"
-                    assert Path(record['abspath']).exists(), f"Binary should exist at {record['abspath']}"
+                if record.get("type") == "Binary":
+                    assert record["name"] == "wget"
+                    assert record["binprovider"] in ["brew", "apt", "env"]
+                    assert record["abspath"], "Should have binary path"
+                    assert Path(record["abspath"]).exists(), (
+                        f"Binary should exist at {record['abspath']}"
+                    )
                     break
             except json.JSONDecodeError:
                 continue
 
     # Verify wget is now available
-    result = subprocess.run(['which', 'wget'], capture_output=True, text=True)
+    result = subprocess.run(["which", "wget"], capture_output=True, text=True)
     assert result.returncode == 0, "wget should be available after installation"
 
 
@@ -144,25 +187,34 @@ def test_archives_example_com():
     """Test full workflow: ensure wget installed then archive example.com."""
 
     # First ensure wget is installed via provider
-    if shutil.which('brew'):
+    if shutil.which("brew"):
         provider_hook = BREW_HOOK
-    elif shutil.which('apt-get'):
+    elif shutil.which("apt-get"):
         provider_hook = APT_HOOK
     else:
-        pass
+        pytest.fail("Neither brew nor apt-get is available on this system")
+
+    assert provider_hook and provider_hook.exists(), (
+        f"Provider hook not found: {provider_hook}"
+    )
 
     # Run installation (idempotent - will succeed if already installed)
     install_result = subprocess.run(
         [
             sys.executable,
             str(provider_hook),
-            '--dependency-id', str(uuid.uuid4()),
-            '--bin-name', 'wget',
-            '--bin-providers', 'apt,brew,env'
+            "--binary-id",
+            str(uuid.uuid4()),
+            "--machine-id",
+            str(uuid.uuid4()),
+            "--name",
+            "wget",
+            "--binproviders",
+            "apt,brew,env",
         ],
         capture_output=True,
         text=True,
-        timeout=300
+        timeout=300,
     )
 
     if install_result.returncode != 0:
@@ -171,59 +223,83 @@ def test_archives_example_com():
     # Now test archiving
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
+        env = os.environ.copy()
+        env["SNAP_DIR"] = str(tmpdir)
 
         # Run wget extraction
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=120
+            env=env,
+            timeout=120,
         )
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
-        # Verify files were downloaded
-        downloaded_files = list(tmpdir.rglob('*.html')) + list(tmpdir.rglob('*.htm'))
-        assert len(downloaded_files) > 0, "No HTML files downloaded"
+        # Verify files were downloaded to wget output directory.
+        output_root = tmpdir / "wget"
+        assert output_root.exists(), "wget output directory was not created"
+
+        downloaded_files = [f for f in output_root.rglob("*") if f.is_file()]
+        assert downloaded_files, "No files downloaded"
+
+        # Try the emitted output path first, then fallback to downloaded files.
+        output_path = (output_root / result_json.get("output_str", "")).resolve()
+        candidate_files = [output_path] if output_path.is_file() else []
+        candidate_files.extend(downloaded_files)
 
-        # Find main HTML file (should contain example.com)
         main_html = None
-        for html_file in downloaded_files:
-            content = html_file.read_text(errors='ignore')
-            if 'example domain' in content.lower():
-                main_html = html_file
+        for candidate in candidate_files:
+            content = candidate.read_text(errors="ignore")
+            if "example domain" in content.lower():
+                main_html = candidate
                 break
 
-        assert main_html is not None, "Could not find main HTML file with example.com content"
+        assert main_html is not None, (
+            "Could not find downloaded file containing example.com content"
+        )
 
-        # Verify HTML content contains REAL example.com text
-        html_content = main_html.read_text(errors='ignore')
-        assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('this domain' in html_content.lower() or
-                'illustrative examples' in html_content.lower()), \
-            "Missing example.com description text"
-        assert ('iana' in html_content.lower() or
-                'more information' in html_content.lower()), \
-            "Missing IANA reference"
+        # Verify page content contains REAL example.com text.
+        html_content = main_html.read_text(errors="ignore")
+        assert len(html_content) > 200, (
+            f"HTML content too short: {len(html_content)} bytes"
+        )
+        assert "example domain" in html_content.lower(), (
+            "Missing 'Example Domain' in HTML"
+        )
+        assert (
+            "this domain" in html_content.lower()
+            or "illustrative examples" in html_content.lower()
+        ), "Missing example.com description text"
+        assert (
+            "iana" in html_content.lower() or "more information" in html_content.lower()
+        ), "Missing IANA reference"
 
 
 def test_config_save_wget_false_skips():
@@ -234,33 +310,50 @@ def test_config_save_wget_false_skips():
 
         # Set WGET_ENABLED=False
         env = os.environ.copy()
-        env['WGET_ENABLED'] = 'False'
+        env["WGET_ENABLED"] = "False"
 
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Should exit 0 when feature disabled
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - no JSONL emission, just logs to stderr
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
 def test_config_save_warc():
     """Test that WGET_SAVE_WARC=True creates WARC files."""
 
     # Ensure wget is available
-    if not shutil.which('wget'):
+    if not shutil.which("wget"):
         pass
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -268,25 +361,34 @@ def test_config_save_warc():
 
         # Set WGET_SAVE_WARC=True explicitly
         env = os.environ.copy()
-        env['WGET_SAVE_WARC'] = 'True'
-        env['SNAP_DIR'] = str(tmpdir)
+        env["WGET_SAVE_WARC"] = "True"
+        env["SNAP_DIR"] = str(tmpdir)
 
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'testwarc'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testwarc",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=120
+            timeout=120,
         )
 
         if result.returncode == 0:
             # Look for WARC files in warc/ subdirectory
-            warc_dir = tmpdir / 'wget' / 'warc'
+            warc_dir = tmpdir / "wget" / "warc"
             if warc_dir.exists():
-                warc_files = list(warc_dir.rglob('*'))
+                warc_files = list(warc_dir.rglob("*"))
                 warc_files = [f for f in warc_files if f.is_file()]
-                assert len(warc_files) > 0, "WARC file not created when WGET_SAVE_WARC=True"
+                assert len(warc_files) > 0, (
+                    "WARC file not created when WGET_SAVE_WARC=True"
+                )
 
 
 def test_staticfile_present_skips():
@@ -295,26 +397,35 @@ def test_staticfile_present_skips():
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
         env = os.environ.copy()
-        env['SNAP_DIR'] = str(tmpdir)
+        env["SNAP_DIR"] = str(tmpdir)
 
         # Create directory structure like real ArchiveBox:
         # tmpdir/
         #   staticfile/  <- staticfile extractor output
         #   wget/         <- wget extractor runs here, looks for ../staticfile
-        staticfile_dir = tmpdir / 'staticfile'
+        staticfile_dir = tmpdir / "staticfile"
         staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
+        (staticfile_dir / "stdout.log").write_text(
+            '{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n'
+        )
 
-        wget_dir = tmpdir / 'wget'
+        wget_dir = tmpdir / "wget"
         wget_dir.mkdir()
 
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'teststatic'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "teststatic",
+            ],
             cwd=wget_dir,  # Run from wget subdirectory
             capture_output=True,
             text=True,
             timeout=30,
-            env=env
+            env=env,
         )
 
         # Should skip with permanent skip JSONL
@@ -322,27 +433,31 @@ def test_staticfile_present_skips():
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should emit ArchiveResult JSONL for permanent skip"
-        assert result_json['status'] == 'skipped', f"Should have status='skipped': {result_json}"
-        assert 'staticfile' in result_json.get('output_str', '').lower(), "Should mention staticfile in output_str"
+        assert result_json["status"] == "skipped", (
+            f"Should have status='skipped': {result_json}"
+        )
+        assert "staticfile" in result_json.get("output_str", "").lower(), (
+            "Should mention staticfile in output_str"
+        )
 
 
 def test_handles_404_gracefully():
     """Test that wget fails gracefully on 404."""
 
-    if not shutil.which('wget'):
+    if not shutil.which("wget"):
         pass
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -350,24 +465,35 @@ def test_handles_404_gracefully():
 
         # Try to download non-existent page
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', 'https://example.com/nonexistent-page-404', '--snapshot-id', 'test404'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                "https://example.com/nonexistent-page-404",
+                "--snapshot-id",
+                "test404",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=60
+            timeout=60,
         )
 
         # Should fail
         assert result.returncode != 0, "Should fail on 404"
         combined = result.stdout + result.stderr
-        assert '404' in combined or 'Not Found' in combined or 'No files downloaded' in combined, \
-            "Should report 404 or no files downloaded"
+        assert (
+            "404" in combined
+            or "Not Found" in combined
+            or "No files downloaded" in combined
+            or "exit=8" in combined
+        ), "Should report 404 or no files downloaded"
 
 
 def test_config_timeout_honored():
     """Test that WGET_TIMEOUT config is respected."""
 
-    if not shutil.which('wget'):
+    if not shutil.which("wget"):
         pass
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -375,16 +501,23 @@ def test_config_timeout_honored():
 
         # Set very short timeout
         env = os.environ.copy()
-        env['WGET_TIMEOUT'] = '5'
+        env["WGET_TIMEOUT"] = "5"
 
         # This should still succeed for example.com (it's fast)
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
         # Verify it completed (success or fail, but didn't hang)
@@ -394,7 +527,7 @@ def test_config_timeout_honored():
 def test_config_user_agent():
     """Test that WGET_USER_AGENT config is used."""
 
-    if not shutil.which('wget'):
+    if not shutil.which("wget"):
         pass
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -402,36 +535,45 @@ def test_config_user_agent():
 
         # Set custom user agent
         env = os.environ.copy()
-        env['WGET_USER_AGENT'] = 'TestBot/1.0'
+        env["WGET_USER_AGENT"] = "TestBot/1.0"
 
         result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'testua'],
+            [
+                sys.executable,
+                str(WGET_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "testua",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=120
+            timeout=120,
         )
 
         # Should succeed (example.com doesn't block)
         if result.returncode == 0:
             # Parse clean JSONL output
             result_json = None
-            for line in result.stdout.strip().split('\n'):
+            for line in result.stdout.strip().split("\n"):
                 line = line.strip()
-                if line.startswith('{'):
+                if line.startswith("{"):
                     pass
                     try:
                         record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
+                        if record.get("type") == "ArchiveResult":
                             result_json = record
                             break
                     except json.JSONDecodeError:
                         pass
 
             assert result_json, "Should have ArchiveResult JSONL output"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+            assert result_json["status"] == "succeeded", (
+                f"Should succeed: {result_json}"
+            )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/abx_plugins/plugins/ytdlp/on_Crawl__15_ytdlp_install.py b/abx_plugins/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
index 9b83772..2e6e714 100755
--- a/abx_plugins/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
+++ b/abx_plugins/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
@@ -13,65 +13,69 @@
 import os
 import sys
 from pathlib import Path
+from typing import Any
 
 PLUGIN_DIR = Path(__file__).parent.name
-CRAWL_DIR = Path(os.environ.get('CRAWL_DIR', '.')).resolve()
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
 OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
+
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
 
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
+def output_binary(
+    name: str, binproviders: str, overrides: dict[str, Any] | None = None
+) -> None:
     """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
+    machine_id = os.environ.get("MACHINE_ID", "")
 
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
+    record: dict[str, Any] = {
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "machine_id": machine_id,
     }
     if overrides:
-        record['overrides'] = overrides
+        record["overrides"] = overrides
     print(json.dumps(record))
 
 
 def main():
-    ytdlp_enabled = get_env_bool('YTDLP_ENABLED', True)
+    ytdlp_enabled = get_env_bool("YTDLP_ENABLED", True)
 
     if not ytdlp_enabled:
         sys.exit(0)
 
     output_binary(
-        name='yt-dlp',
-        binproviders='pip,brew,apt,env',
-        overrides={'pip': {'packages': ['yt-dlp[default]']}},
+        name="yt-dlp",
+        binproviders="pip,brew,apt,env",
+        overrides={"pip": {"packages": ["yt-dlp[default]"]}},
     )
 
-    # Node.js (required by several JS-based extractors, declared here per legacy binaries.jsonl)
+    # Node.js (required by several JS-based extractors)
     output_binary(
-        name='node',
-        binproviders='apt,brew,env',
-        overrides={'apt': {'packages': ['nodejs']}},
+        name="node",
+        binproviders="apt,brew,env",
+        overrides={"apt": {"packages": ["nodejs"]}},
     )
 
     # ffmpeg (used by media extraction)
-    output_binary(name='ffmpeg', binproviders='apt,brew,env')
+    output_binary(name="ffmpeg", binproviders="apt,brew,env")
 
     sys.exit(0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py b/abx_plugins/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
index 4dfbcad..a183eb5 100755
--- a/abx_plugins/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
+++ b/abx_plugins/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
@@ -34,21 +34,21 @@
 
 
 PLUGIN_DIR = Path(__file__).resolve().parent.name
-SNAP_DIR = Path(os.environ.get('SNAP_DIR', '.')).resolve()
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
 OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 os.chdir(OUTPUT_DIR)
 
 
-def get_env(name: str, default: str = '') -> str:
+def get_env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
 
 
 def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
         return True
-    if val in ('false', '0', 'no', 'off'):
+    if val in ("false", "0", "no", "off"):
         return False
     return default
 
@@ -62,7 +62,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
 def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
     """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
+    val = get_env(name, "")
     if not val:
         return default if default is not None else []
     try:
@@ -74,25 +74,29 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
         return default if default is not None else []
 
 
-STATICFILE_DIR = '../staticfile'
+STATICFILE_DIR = "../staticfile"
+
 
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
     if not staticfile_dir.exists():
         return False
-    stdout_log = staticfile_dir / 'stdout.log'
+    stdout_log = staticfile_dir / "stdout.log"
     if not stdout_log.exists():
         return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
+    for line in stdout_log.read_text(errors="ignore").splitlines():
         line = line.strip()
-        if not line.startswith('{'):
+        if not line.startswith("{"):
             continue
         try:
             record = json.loads(line)
         except json.JSONDecodeError:
             continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+        if (
+            record.get("type") == "ArchiveResult"
+            and record.get("status") == "succeeded"
+        ):
             return True
     return False
 
@@ -104,42 +108,46 @@ def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
     Returns: (success, output_path, error_message)
     """
     # Get config from env (with YTDLP_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('YTDLP_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('YTDLP_CHECK_SSL_VALIDITY', True) if get_env('YTDLP_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    cookies_file = get_env('YTDLP_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-    max_size = get_env('YTDLP_MAX_SIZE', '750m')
-    node_binary = get_env('YTDLP_NODE_BINARY') or get_env('NODE_BINARY', 'node')
-    ytdlp_args = get_env_array('YTDLP_ARGS', [])
-    ytdlp_args_extra = get_env_array('YTDLP_ARGS_EXTRA', [])
+    timeout = get_env_int("YTDLP_TIMEOUT") or get_env_int("TIMEOUT", 3600)
+    check_ssl = (
+        get_env_bool("YTDLP_CHECK_SSL_VALIDITY", True)
+        if get_env("YTDLP_CHECK_SSL_VALIDITY")
+        else get_env_bool("CHECK_SSL_VALIDITY", True)
+    )
+    cookies_file = get_env("YTDLP_COOKIES_FILE") or get_env("COOKIES_FILE", "")
+    max_size = get_env("YTDLP_MAX_SIZE", "750m")
+    node_binary = get_env("YTDLP_NODE_BINARY") or get_env("NODE_BINARY", "node")
+    ytdlp_args = get_env_array("YTDLP_ARGS", [])
+    ytdlp_args_extra = get_env_array("YTDLP_ARGS_EXTRA", [])
 
     # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path('.')
+    output_dir = Path(".")
 
     # Build command (later options take precedence)
     cmd = [
         binary,
         *ytdlp_args,
         # Format with max_size limit (appended after YTDLP_ARGS so it can be overridden by YTDLP_ARGS_EXTRA)
-        f'--format=(bv*+ba/b)[filesize<={max_size}][filesize_approx<=?{max_size}]/(bv*+ba/b)',
-        f'--js-runtimes=node:{node_binary}',
+        f"--format=(bv*+ba/b)[filesize<={max_size}][filesize_approx<=?{max_size}]/(bv*+ba/b)",
+        f"--js-runtimes=node:{node_binary}",
     ]
 
     if not check_ssl:
-        cmd.append('--no-check-certificate')
+        cmd.append("--no-check-certificate")
 
     if cookies_file and Path(cookies_file).is_file():
-        cmd.extend(['--cookies', cookies_file])
+        cmd.extend(["--cookies", cookies_file])
 
     if ytdlp_args_extra:
         cmd.extend(ytdlp_args_extra)
 
-    if '--newline' not in cmd:
-        cmd.append('--newline')
+    if "--newline" not in cmd:
+        cmd.append("--newline")
 
     cmd.append(url)
 
     try:
-        print(f'[ytdlp] Starting download (timeout={timeout}s)', file=sys.stderr)
+        print(f"[ytdlp] Starting download (timeout={timeout}s)", file=sys.stderr)
 
         output_lines: list[str] = []
         process = subprocess.Popen(
@@ -165,82 +173,127 @@ def _read_output() -> None:
         except subprocess.TimeoutExpired:
             process.kill()
             reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
+            return False, None, f"Timed out after {timeout} seconds"
 
         reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
+        combined_output = "".join(output_lines)
 
         # Check if any media files were downloaded
         media_extensions = (
-            '.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.m4v',
-            '.mp3', '.m4a', '.ogg', '.wav', '.flac', '.aac', '.opus',
-            '.json', '.jpg', '.png', '.webp', '.jpeg',
-            '.vtt', '.srt', '.ass', '.lrc',
-            '.description',
+            ".mp4",
+            ".webm",
+            ".mkv",
+            ".avi",
+            ".mov",
+            ".flv",
+            ".wmv",
+            ".m4v",
+            ".mp3",
+            ".m4a",
+            ".ogg",
+            ".wav",
+            ".flac",
+            ".aac",
+            ".opus",
+            ".json",
+            ".jpg",
+            ".png",
+            ".webp",
+            ".jpeg",
+            ".vtt",
+            ".srt",
+            ".ass",
+            ".lrc",
+            ".description",
         )
 
         downloaded_files = [
-            f for f in output_dir.glob('*')
+            f
+            for f in output_dir.glob("*")
             if f.is_file() and f.suffix.lower() in media_extensions
         ]
 
         if downloaded_files:
             # Return first video/audio file, or first file if no media
             video_audio = [
-                f for f in downloaded_files
-                if f.suffix.lower() in ('.mp4', '.webm', '.mkv', '.avi', '.mov', '.mp3', '.m4a', '.ogg', '.wav', '.flac')
+                f
+                for f in downloaded_files
+                if f.suffix.lower()
+                in (
+                    ".mp4",
+                    ".webm",
+                    ".mkv",
+                    ".avi",
+                    ".mov",
+                    ".mp3",
+                    ".m4a",
+                    ".ogg",
+                    ".wav",
+                    ".flac",
+                )
             ]
             output = str(video_audio[0]) if video_audio else str(downloaded_files[0])
-            return True, output, ''
+            return True, output, ""
         else:
             stderr = combined_output
 
             # These are NOT errors - page simply has no downloadable media
             # Return success with no output (legitimate "nothing to download")
-            if 'ERROR: Unsupported URL' in stderr:
-                return True, None, ''  # Not a media site - success, no output
-            if 'URL could be a direct video link' in stderr:
-                return True, None, ''  # Not a supported media URL - success, no output
+            if "ERROR: Unsupported URL" in stderr:
+                return True, None, ""  # Not a media site - success, no output
+            if "URL could be a direct video link" in stderr:
+                return True, None, ""  # Not a supported media URL - success, no output
             if process.returncode == 0:
-                return True, None, ''  # yt-dlp exited cleanly, just no media - success
+                return True, None, ""  # yt-dlp exited cleanly, just no media - success
 
             # These ARE errors - something went wrong
-            if 'HTTP Error 404' in stderr:
-                return False, None, '404 Not Found'
-            if 'HTTP Error 403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'Unable to extract' in stderr:
-                return False, None, 'Unable to extract media info'
+            if "HTTP Error 404" in stderr:
+                return False, None, "404 Not Found"
+            if "HTTP Error 403" in stderr:
+                return False, None, "403 Forbidden"
+            if "Unable to extract" in stderr:
+                return False, None, "Unable to extract media info"
 
-            return False, None, f'yt-dlp error: {stderr}'
+            return False, None, f"yt-dlp error: {stderr}"
 
     except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        return False, None, f"Timed out after {timeout} seconds"
     except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
+        return False, None, f"{type(e).__name__}: {e}"
 
 
 @click.command()
-@click.option('--url', required=True, help='URL to download video/audio from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
+@click.option("--url", required=True, help="URL to download video/audio from")
+@click.option("--snapshot-id", required=True, help="Snapshot UUID")
 def main(url: str, snapshot_id: str):
     """Download video/audio from a URL using yt-dlp."""
 
     try:
         # Check if yt-dlp downloading is enabled
-        if not get_env_bool('YTDLP_ENABLED', True):
-            print('Skipping ytdlp (YTDLP_ENABLED=False)', file=sys.stderr)
+        if not get_env_bool("YTDLP_ENABLED", True):
+            print("Skipping ytdlp (YTDLP_ENABLED=False)", file=sys.stderr)
             # Temporary failure (config disabled) - NO JSONL emission
             sys.exit(0)
 
         # Check if staticfile extractor already handled this (permanent skip)
         if has_staticfile_output():
-            print('Skipping ytdlp - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
+            print(
+                "Skipping ytdlp - staticfile extractor already downloaded this",
+                file=sys.stderr,
+            )
+            print(
+                json.dumps(
+                    {
+                        "type": "ArchiveResult",
+                        "status": "skipped",
+                        "output_str": "staticfile already exists",
+                    }
+                )
+            )
             sys.exit(0)
 
         # Get binary from environment
-        binary = get_env('YTDLP_BINARY', 'yt-dlp')
+        binary = get_env("YTDLP_BINARY", "yt-dlp")
 
         # Run extraction
         success, output, error = save_ytdlp(url, binary)
@@ -248,22 +301,22 @@ def main(url: str, snapshot_id: str):
         if success:
             # Success - emit ArchiveResult
             result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": output or "",
             }
             print(json.dumps(result))
             sys.exit(0)
         else:
             # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
+            print(f"ERROR: {error}", file=sys.stderr)
             sys.exit(1)
 
     except Exception as e:
         # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/abx_plugins/plugins/ytdlp/tests/test_ytdlp.py b/abx_plugins/plugins/ytdlp/tests/test_ytdlp.py
index 561c432..85f20da 100644
--- a/abx_plugins/plugins/ytdlp/tests/test_ytdlp.py
+++ b/abx_plugins/plugins/ytdlp/tests/test_ytdlp.py
@@ -11,88 +11,244 @@
 """
 
 import json
+import io
+import os
 import subprocess
 import sys
 import tempfile
 import time
+import uuid
+import wave
 from pathlib import Path
 import pytest
 
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
-YTDLP_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_ytdlp.*'), None)
-TEST_URL = 'https://example.com/video.mp4'
+_YTDLP_HOOK = next(PLUGIN_DIR.glob("on_Snapshot__*_ytdlp.*"), None)
+if _YTDLP_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+YTDLP_HOOK = _YTDLP_HOOK
+TEST_URL = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
+
+# Module-level cache for binary path
+_ytdlp_binary_path = None
+_ytdlp_lib_root = None
+
+
+def _has_ssl_cert_error(result: subprocess.CompletedProcess[str]) -> bool:
+    combined = f"{result.stdout}\n{result.stderr}"
+    return "CERTIFICATE_VERIFY_FAILED" in combined
+
+
+def _build_test_wav_bytes() -> bytes:
+    """Build a short deterministic WAV payload for local-media extractor tests."""
+    sample_rate = 8000
+    duration_seconds = 1
+    num_frames = sample_rate * duration_seconds
+
+    wav_io = io.BytesIO()
+    with wave.open(wav_io, "wb") as wav_file:
+        wav_file.setnchannels(1)
+        wav_file.setsampwidth(2)
+        wav_file.setframerate(sample_rate)
+        wav_file.writeframes(b"\x00\x00" * num_frames)
+
+    return wav_io.getvalue()
+
+
+@pytest.fixture
+def non_video_test_url(httpserver):
+    """Serve deterministic non-media content for failure-path ytdlp tests."""
+    httpserver.expect_request("/").respond_with_data(
+        """
+        <!doctype html>
+        <html>
+          <head><title>Not a media URL</title></head>
+          <body><h1>No downloadable media here</h1></body>
+        </html>
+        """.strip(),
+        content_type="text/html; charset=utf-8",
+    )
+    return httpserver.url_for("/")
 
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert YTDLP_HOOK.exists(), f"Hook not found: {YTDLP_HOOK}"
+
+@pytest.fixture
+def media_test_url(httpserver):
+    """Serve deterministic media bytes for end-to-end ytdlp extraction tests."""
+    httpserver.expect_request("/sample.wav").respond_with_data(
+        _build_test_wav_bytes(),
+        content_type="audio/wav",
+    )
+    return httpserver.url_for("/sample.wav")
 
 
-def test_verify_deps_with_abx_pkg():
-    """Verify yt-dlp, node, and ffmpeg are available via abx-pkg."""
-    from abx_pkg import Binary, PipProvider, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
+def require_ytdlp_binary() -> str:
+    """Return yt-dlp binary path or fail with actionable context."""
+    binary_path = get_ytdlp_binary_path()
+    assert binary_path, (
+        "yt-dlp installation failed. Install hook should install yt-dlp "
+        "automatically in this test environment."
+    )
+    assert Path(binary_path).is_file(), f"yt-dlp binary path invalid: {binary_path}"
+    return binary_path
+
+
+def get_ytdlp_binary_path():
+    """Get yt-dlp path from cache or by running install hooks."""
+    global _ytdlp_binary_path
+    if _ytdlp_binary_path and Path(_ytdlp_binary_path).is_file():
+        return _ytdlp_binary_path
+
+    from abx_pkg import Binary, PipProvider, EnvProvider
+
+    try:
+        binary = Binary(
+            name="yt-dlp",
+            binproviders=[PipProvider(), EnvProvider()],
+            overrides={"pip": {"packages": ["yt-dlp[default]"]}},
+        ).load()
+        if binary and binary.abspath:
+            _ytdlp_binary_path = str(binary.abspath)
+            return _ytdlp_binary_path
+    except Exception:
+        pass
 
-    missing_binaries = []
+    pip_hook = PLUGINS_ROOT / "pip" / "on_Binary__11_pip_install.py"
+    crawl_hook = PLUGIN_DIR / "on_Crawl__15_ytdlp_install.py"
+    if not pip_hook.exists():
+        return None
 
-    # Verify yt-dlp is available
-    ytdlp_binary = Binary(name='yt-dlp', binproviders=[PipProvider(), EnvProvider()])
-    ytdlp_loaded = ytdlp_binary.load()
-    if not (ytdlp_loaded and ytdlp_loaded.abspath):
-        missing_binaries.append('yt-dlp')
+    binary_id = str(uuid.uuid4())
+    machine_id = str(uuid.uuid4())
+    binproviders = "*"
+    overrides = None
 
-    # Verify node is available (yt-dlp needs it for JS extraction)
-    node_binary = Binary(
-        name='node',
-        binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
+    if crawl_hook.exists():
+        crawl_result = subprocess.run(
+            [sys.executable, str(crawl_hook)],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        for line in crawl_result.stdout.strip().split("\n"):
+            if not line.strip().startswith("{"):
+                continue
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if record.get("type") == "Binary" and record.get("name") == "yt-dlp":
+                binproviders = record.get("binproviders", "*")
+                overrides = record.get("overrides")
+                break
+
+    global _ytdlp_lib_root
+    if not _ytdlp_lib_root:
+        _ytdlp_lib_root = tempfile.mkdtemp(prefix="ytdlp-lib-")
+
+    env = os.environ.copy()
+    env["HOME"] = str(_ytdlp_lib_root)
+    env["SNAP_DIR"] = str(Path(_ytdlp_lib_root) / "data")
+    env["CRAWL_DIR"] = str(Path(_ytdlp_lib_root) / "crawl")
+    env.pop("LIB_DIR", None)
+
+    cmd = [
+        sys.executable,
+        str(pip_hook),
+        "--binary-id",
+        binary_id,
+        "--machine-id",
+        machine_id,
+        "--name",
+        "yt-dlp",
+        f"--binproviders={binproviders}",
+    ]
+    if overrides:
+        cmd.append(f"--overrides={json.dumps(overrides)}")
+
+    install_result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=300,
+        env=env,
     )
-    node_loaded = node_binary.load()
-    if not (node_loaded and node_loaded.abspath):
-        missing_binaries.append('node')
 
-    # Verify ffmpeg is available (yt-dlp needs it for video conversion)
-    ffmpeg_binary = Binary(name='ffmpeg', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
-    ffmpeg_loaded = ffmpeg_binary.load()
-    if not (ffmpeg_loaded and ffmpeg_loaded.abspath):
-        missing_binaries.append('ffmpeg')
+    for line in install_result.stdout.strip().split("\n"):
+        if not line.strip().startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get("type") == "Binary" and record.get("name") == "yt-dlp":
+            _ytdlp_binary_path = record.get("abspath")
+            return _ytdlp_binary_path
 
-    if missing_binaries:
-        pass
+    return None
+
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert YTDLP_HOOK.exists(), f"Hook not found: {YTDLP_HOOK}"
 
-def test_handles_non_video_url():
+
+def test_verify_deps_with_abx_pkg():
+    """Verify yt-dlp is installed by real plugin install hooks."""
+    binary_path = require_ytdlp_binary()
+    assert Path(binary_path).is_file(), (
+        f"Binary path must be a valid file: {binary_path}"
+    )
+
+
+def test_handles_non_video_url(non_video_test_url):
     """Test that ytdlp extractor handles non-video URLs gracefully via hook."""
-    # Prerequisites checked by earlier test
+    binary_path = require_ytdlp_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
+        env = os.environ.copy()
+        env["YTDLP_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
 
         # Run ytdlp extraction hook on non-video URL
         result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
+            [
+                sys.executable,
+                str(YTDLP_HOOK),
+                "--url",
+                non_video_test_url,
+                "--snapshot-id",
+                "test789",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=60
+            env=env,
+            timeout=60,
         )
 
         # Should exit 0 even for non-media URL
-        assert result.returncode == 0, f"Should handle non-media URL gracefully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should handle non-media URL gracefully: {result.stderr}"
+        )
 
         # Parse clean JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 pass
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
         assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
 
 def test_config_ytdlp_enabled_false_skips():
@@ -101,102 +257,161 @@ def test_config_ytdlp_enabled_false_skips():
 
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['YTDLP_ENABLED'] = 'False'
+        env["YTDLP_ENABLED"] = "False"
 
         result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            [
+                sys.executable,
+                str(YTDLP_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test999",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=30
+            timeout=30,
         )
 
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should exit 0 when feature disabled: {result.stderr}"
+        )
 
         # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+        assert "Skipping" in result.stderr or "False" in result.stderr, (
+            "Should log skip reason to stderr"
+        )
 
         # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        jsonl_lines = [
+            line
+            for line in result.stdout.strip().split("\n")
+            if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0, (
+            f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
+        )
 
 
-def test_config_timeout():
+def test_config_timeout(non_video_test_url):
     """Test that YTDLP_TIMEOUT config is respected (also via MEDIA_TIMEOUT alias)."""
-    import os
+    binary_path = require_ytdlp_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         env = os.environ.copy()
-        env['YTDLP_TIMEOUT'] = '5'
+        env["YTDLP_TIMEOUT"] = "5"
+        env["YTDLP_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
 
         start_time = time.time()
         result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
+            [
+                sys.executable,
+                str(YTDLP_HOOK),
+                "--url",
+                non_video_test_url,
+                "--snapshot-id",
+                "testtimeout",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
+            timeout=10,  # Should complete in 5s, use 10s as safety margin
         )
         elapsed_time = time.time() - start_time
 
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should complete without hanging: {result.stderr}"
+        )
         # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
+        assert elapsed_time <= 6.0, (
+            f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
+        )
 
 
-def test_real_youtube_url():
-    """Test that yt-dlp can extract video/audio from a real YouTube URL."""
-    import os
+def test_extracts_local_media_url(media_test_url):
+    """Test yt-dlp extraction against deterministic local media served by httpserver."""
+    binary_path = require_ytdlp_binary()
 
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = Path(tmpdir)
 
-        # Use a short, stable YouTube video (YouTube's own about video)
-        youtube_url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw'  # "Me at the zoo" - first YouTube video
-
         env = os.environ.copy()
-        env['YTDLP_TIMEOUT'] = '120'  # Give it time to download
+        env["YTDLP_TIMEOUT"] = "60"
+        env["YTDLP_BINARY"] = binary_path
+        env["SNAP_DIR"] = str(tmpdir)
 
         start_time = time.time()
         result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', youtube_url, '--snapshot-id', 'testyoutube'],
+            [
+                sys.executable,
+                str(YTDLP_HOOK),
+                "--url",
+                media_test_url,
+                "--snapshot-id",
+                "testlocalmedia",
+            ],
             cwd=tmpdir,
             capture_output=True,
             text=True,
             env=env,
-            timeout=180
+            timeout=90,
         )
         elapsed_time = time.time() - start_time
 
-        # Should succeed
-        assert result.returncode == 0, f"Should extract video/audio successfully: {result.stderr}"
+        assert result.returncode == 0, (
+            f"Should extract local media successfully: {result.stderr}"
+        )
 
         # Parse JSONL output
         result_json = None
-        for line in result.stdout.strip().split('\n'):
+        for line in result.stdout.strip().split("\n"):
             line = line.strip()
-            if line.startswith('{'):
+            if line.startswith("{"):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
+                    if record.get("type") == "ArchiveResult":
                         result_json = record
                         break
                 except json.JSONDecodeError:
                     pass
 
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+        assert result_json, (
+            f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
+        )
+        assert result_json["status"] == "succeeded", f"Should succeed: {result_json}"
 
         # Check that some video/audio files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        media_files = [f for f in output_files if f.is_file() and f.suffix.lower() in ('.mp4', '.webm', '.mkv', '.m4a', '.mp3', '.json', '.jpg', '.webp')]
-
-        assert len(media_files) > 0, f"Should have downloaded at least one video/audio file. Files: {output_files}"
+        output_files = list(tmpdir.glob("**/*"))
+        media_files = [
+            f
+            for f in output_files
+            if f.is_file()
+            and f.suffix.lower()
+            in (
+                ".mp4",
+                ".webm",
+                ".mkv",
+                ".m4a",
+                ".mp3",
+                ".wav",
+                ".json",
+                ".jpg",
+                ".webp",
+            )
+        ]
+
+        assert len(media_files) > 0, (
+            f"Should have downloaded at least one video/audio file. Files: {output_files}"
+        )
 
-        print(f"Successfully extracted {len(media_files)} file(s) in {elapsed_time:.2f}s")
+        print(
+            f"Successfully extracted {len(media_files)} file(s) in {elapsed_time:.2f}s"
+        )
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/conftest.py b/conftest.py
index 74e4eea..714a325 100644
--- a/conftest.py
+++ b/conftest.py
@@ -9,7 +9,9 @@
 
 
 @pytest.fixture(autouse=True)
-def isolated_test_env(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> dict[str, Path]:
+def isolated_test_env(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> dict[str, Path]:
     """Apply per-test env overrides and let monkeypatch restore global state after each test."""
     test_root = tmp_path / "abx_plugins_env"
     home_dir = test_root / "home"
@@ -30,6 +32,8 @@ def isolated_test_env(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> dict[s
         monkeypatch.setenv("LIB_DIR", str(lib_dir))
     if "PERSONAS_DIR" not in os.environ:
         monkeypatch.setenv("PERSONAS_DIR", str(personas_dir))
+    if "TWOCAPTCHA_API_KEY" not in os.environ and "API_KEY_2CAPTCHA" not in os.environ:
+        print("WARNING: TWOCAPTCHA_API_KEY not found in env, 2captcha tests will fail")
 
     return {
         "root": test_root,
@@ -47,7 +51,7 @@ def local_http_base_url(httpserver) -> str:
     return httpserver.url_for("/")
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="session")
 def ensure_chrome_test_prereqs(ensure_chromium_and_puppeteer_installed):
-    """Install shared Chromium/Puppeteer deps once so hook-only tests can run in isolation."""
+    """Install shared Chromium/Puppeteer deps when explicitly requested by tests."""
     return ensure_chromium_and_puppeteer_installed
diff --git a/pyproject.toml b/pyproject.toml
index cb53a4a..429800a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,9 @@
 [project]
 name = "abx-plugins"
-version = "0.9.0"
+version = "0.9.1"
 description = "ArchiveBox-compatible plugin suite (hooks, configs, binaries manifests)"
 authors = [{name = "Nick Sweeting", email = "pyproject.toml+abx-plugins@archivebox.io"}]
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 license = {text = "MIT"}
 readme = "README.md"
 keywords = ["archivebox", "plugins", "web-archiving", "hooks", "scraping"]
@@ -19,8 +19,15 @@ classifiers = [
     "Environment :: Console",
 ]
 dependencies = [
-    "abx-pkg>=0.6.0",
+    "abx-pkg>=0.6.3",
+    "feedparser>=6.0.0",
+    "pyright>=1.1.408",
+    "pytest>=9.0.2",
+    "pytest-httpserver>=1.1.0",
+    "requests>=2.32.5",
     "rich-click>=1.9.7",
+    "ruff>=0.15.2",
+    "ty>=0.0.18",
 ]
 
 [project.optional-dependencies]