diff --git a/abx_plugins/plugins/defuddle/__init__.py b/abx_plugins/plugins/defuddle/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/abx_plugins/plugins/defuddle/config.json b/abx_plugins/plugins/defuddle/config.json
new file mode 100644
index 0000000..aeb25ec
--- /dev/null
+++ b/abx_plugins/plugins/defuddle/config.json
@@ -0,0 +1,39 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "additionalProperties": false,
+  "properties": {
+    "DEFUDDLE_ENABLED": {
+      "type": "boolean",
+      "default": true,
+      "x-aliases": ["SAVE_DEFUDDLE", "USE_DEFUDDLE"],
+      "description": "Enable Defuddle text extraction"
+    },
+    "DEFUDDLE_BINARY": {
+      "type": "string",
+      "default": "defuddle",
+      "description": "Path to defuddle binary"
+    },
+    "DEFUDDLE_TIMEOUT": {
+      "type": "integer",
+      "default": 30,
+      "minimum": 5,
+      "x-fallback": "TIMEOUT",
+      "description": "Timeout for Defuddle in seconds"
+    },
+    "DEFUDDLE_ARGS": {
+      "type": "array",
+      "items": {"type": "string"},
+      "default": [],
+      "x-aliases": ["DEFUDDLE_DEFAULT_ARGS"],
+      "description": "Default Defuddle arguments"
+    },
+    "DEFUDDLE_ARGS_EXTRA": {
+      "type": "array",
+      "items": {"type": "string"},
+      "default": [],
+      "x-aliases": ["DEFUDDLE_EXTRA_ARGS"],
+      "description": "Extra arguments to append to Defuddle command"
+    }
+  }
+}
diff --git a/abx_plugins/plugins/defuddle/on_Crawl__41_defuddle_install.py b/abx_plugins/plugins/defuddle/on_Crawl__41_defuddle_install.py
new file mode 100644
index 0000000..78eb78a
--- /dev/null
+++ b/abx_plugins/plugins/defuddle/on_Crawl__41_defuddle_install.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.12"
+# ///
+"""
+Emit defuddle Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+
+PLUGIN_DIR = Path(__file__).parent.name
+CRAWL_DIR = Path(os.environ.get("CRAWL_DIR", ".")).resolve()
+OUTPUT_DIR = CRAWL_DIR / PLUGIN_DIR
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+os.chdir(OUTPUT_DIR)
+
+
+def get_env(name: str, default: str = "") -> str:
+    return os.environ.get(name, default).strip()
+
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
+        return True
+    if val in ("false", "0", "no", "off"):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str):
+    machine_id = os.environ.get("MACHINE_ID", "")
+
+    record = {
+        "type": "Binary",
+        "name": name,
+        "binproviders": binproviders,
+        "overrides": {
+            "npm": {
+                "packages": ["defuddle"],
+            },
+        },
+        "machine_id": machine_id,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    if not get_env_bool("DEFUDDLE_ENABLED", True):
+        sys.exit(0)
+
+    output_binary(name="defuddle", binproviders="npm,env")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/abx_plugins/plugins/defuddle/on_Snapshot__57_defuddle.py b/abx_plugins/plugins/defuddle/on_Snapshot__57_defuddle.py
new file mode 100644
index 0000000..0b36142
--- /dev/null
+++ b/abx_plugins/plugins/defuddle/on_Snapshot__57_defuddle.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#   "click",
+# ]
+# ///
+#
+# Extract article content using Defuddle.
+
+import argparse
+import html
+import json
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+PLUGIN_DIR = Path(__file__).resolve().parent.name
+SNAP_DIR = Path(os.environ.get("SNAP_DIR", ".")).resolve()
+OUTPUT_DIR = SNAP_DIR / PLUGIN_DIR
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+os.chdir(OUTPUT_DIR)
+
+
+def get_env(name: str, default: str = "") -> str:
+    return os.environ.get(name, default).strip()
+
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, "").lower()
+    if val in ("true", "1", "yes", "on"):
+        return True
+    if val in ("false", "0", "no", "off"):
+        return False
+    return default
+
+
+def get_env_int(name: str, default: int = 0) -> int:
+    try:
+        return int(get_env(name, str(default)))
+    except ValueError:
+        return default
+
+
+def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
+    val = get_env(name, "")
+    if not val:
+        return default if default is not None else []
+    try:
+        result = json.loads(val)
+        if isinstance(result, list):
+            return [str(item) for item in result]
+        return default if default is not None else []
+    except json.JSONDecodeError:
+        return default if default is not None else []
+
+
+def find_html_source() -> str | None:
+    """Return first non-empty HTML source file from sibling extractor outputs."""
+    search_patterns = [
+        "singlefile/singlefile.html",
+        "*_singlefile/singlefile.html",
+        "singlefile/*.html",
+        "*_singlefile/*.html",
+        "dom/output.html",
+        "*_dom/output.html",
+        "dom/*.html",
+        "*_dom/*.html",
+        "wget/**/*.html",
+        "*_wget/**/*.html",
+        "wget/**/*.htm",
+        "*_wget/**/*.htm",
+    ]
+
+    for base in (Path.cwd(), Path.cwd().parent):
+        for pattern in search_patterns:
+            for match in base.glob(pattern):
+                if match.is_file() and match.stat().st_size > 0:
+                    return str(match)
+    return None
+
+
+def extract_defuddle(url: str, binary: str) -> tuple[bool, str | None, str]:
+    timeout = get_env_int("DEFUDDLE_TIMEOUT") or get_env_int("TIMEOUT", 60)
+    defuddle_args = get_env_array("DEFUDDLE_ARGS", [])
+    defuddle_args_extra = get_env_array("DEFUDDLE_ARGS_EXTRA", [])
+    output_dir = Path(OUTPUT_DIR)
+    html_source = find_html_source()
+    if not html_source:
+        return False, None, "No HTML source found (run singlefile, dom, or wget first)"
+
+    try:
+        cmd = [
+            binary,
+            *defuddle_args,
+            "parse",
+            html_source,
+            *defuddle_args_extra,
+        ]
+        if "--json" not in cmd and "-j" not in cmd:
+            cmd.append("--json")
+        result = subprocess.run(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=timeout,
+            text=True,
+        )
+
+        if result.returncode != 0:
+            err = (result.stderr or "").strip()
+            if err:
+                return False, None, f"defuddle failed (exit={result.returncode}): {err}"
+            return False, None, f"defuddle failed (exit={result.returncode})"
+
+        raw_output = result.stdout.strip()
+        html_content = ""
+        text_content = ""
+        metadata: dict[str, object] = {}
+
+        try:
+            parsed = json.loads(raw_output)
+        except json.JSONDecodeError:
+            parsed = None
+
+        if isinstance(parsed, dict):
+            html_content = str(parsed.get("content") or parsed.get("html") or "")
+            text_content = str(
+                parsed.get("textContent")
+                or parsed.get("text")
+                or parsed.get("markdown")
+                or ""
+            )
+            metadata = {
+                key: value
+                for key, value in parsed.items()
+                if key not in {"content", "html", "textContent", "text", "markdown"}
+            }
+        elif raw_output:
+            text_content = raw_output
+
+        if text_content and not html_content:
+            html_content = f"<pre>{html.escape(text_content)}</pre>"
+
+        if not text_content and html_content:
+            text_content = re.sub(r"<[^>]+>", " ", html_content)
+            text_content = " ".join(text_content.split())
+
+        if not text_content and not html_content:
+            return False, None, "No content extracted"
+
+        (output_dir / "content.html").write_text(html_content, encoding="utf-8")
+        (output_dir / "content.txt").write_text(text_content, encoding="utf-8")
+        (output_dir / "article.json").write_text(
+            json.dumps(metadata, indent=2), encoding="utf-8"
+        )
+
+        return True, "content.html", ""
+    except subprocess.TimeoutExpired:
+        return False, None, f"Timed out after {timeout} seconds"
+    except Exception as e:
+        return False, None, f"{type(e).__name__}: {e}"
+
+
+def main():
+    try:
+        parser = argparse.ArgumentParser()
+        parser.add_argument("--url", required=True, help="URL to extract article from")
+        parser.add_argument("--snapshot-id", required=True, help="Snapshot UUID")
+        args = parser.parse_args()
+
+        if not get_env_bool("DEFUDDLE_ENABLED", True):
+            print("Skipping defuddle (DEFUDDLE_ENABLED=False)", file=sys.stderr)
+            sys.exit(0)
+
+        binary = get_env("DEFUDDLE_BINARY", "defuddle")
+        success, output, error = extract_defuddle(args.url, binary)
+
+        if success:
+            print(
+                json.dumps(
+                    {
+                        "type": "ArchiveResult",
+                        "status": "succeeded",
+                        "output_str": output or "",
+                    }
+                )
+            )
+            sys.exit(0)
+
+        print(f"ERROR: {error}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/abx_plugins/plugins/defuddle/tests/test_defuddle.py b/abx_plugins/plugins/defuddle/tests/test_defuddle.py
new file mode 100644
index 0000000..9f73136
--- /dev/null
+++ b/abx_plugins/plugins/defuddle/tests/test_defuddle.py
@@ -0,0 +1,277 @@
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+from urllib.request import urlopen
+
+import pytest
+
+from abx_plugins.plugins.chrome.tests.chrome_test_helpers import (
+    get_hook_script,
+    get_plugin_dir,
+)
+
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+PLUGINS_ROOT = PLUGIN_DIR.parent
+_DEFUDDLE_HOOK = get_hook_script(PLUGIN_DIR, "on_Snapshot__*_defuddle.*")
+if _DEFUDDLE_HOOK is None:
+    raise FileNotFoundError(f"Hook not found in {PLUGIN_DIR}")
+DEFUDDLE_HOOK = _DEFUDDLE_HOOK
+
+_DEFUDDLE_CRAWL_HOOK = get_hook_script(PLUGIN_DIR, "on_Crawl__*_defuddle_install.*")
+if _DEFUDDLE_CRAWL_HOOK is None:
+    raise FileNotFoundError(f"Crawl hook not found in {PLUGIN_DIR}")
+DEFUDDLE_CRAWL_HOOK = _DEFUDDLE_CRAWL_HOOK
+
+
+TEST_URL = "https://example.com"
+_defuddle_binary_path = None
+_defuddle_lib_root = None
+
+
+def create_example_html(tmpdir: Path) -> Path:
+    """Create a local singlefile HTML fixture used as parser input."""
+    singlefile_dir = tmpdir / "singlefile"
+    singlefile_dir.mkdir(parents=True, exist_ok=True)
+    html_file = singlefile_dir / "singlefile.html"
+    html_file.write_text(
+        "<html><head><title>Example Domain</title></head><body><article><h1>Example Domain</h1><p>Example text body</p></article></body></html>",
+        encoding="utf-8",
+    )
+    return html_file
+
+
+def require_defuddle_binary() -> str:
+    """Return defuddle binary path or fail with actionable context."""
+    binary_path = get_defuddle_binary_path()
+    assert binary_path, (
+        "defuddle installation failed. Install hook should install "
+        "the binary automatically in this test environment."
+    )
+    assert Path(binary_path).is_file(), f"defuddle binary path invalid: {binary_path}"
+    return binary_path
+
+
+def get_defuddle_binary_path() -> str | None:
+    """Get defuddle path from cache or by running install hooks."""
+    global _defuddle_binary_path
+    if _defuddle_binary_path and Path(_defuddle_binary_path).is_file():
+        return _defuddle_binary_path
+
+    from abx_pkg import Binary, EnvProvider, NpmProvider
+
+    try:
+        binary = Binary(
+            name="defuddle",
+            binproviders=[NpmProvider(), EnvProvider()],
+            overrides={"npm": {"packages": ["defuddle"]}},
+        ).load()
+        if binary and binary.abspath:
+            _defuddle_binary_path = str(binary.abspath)
+            return _defuddle_binary_path
+    except Exception:
+        pass
+
+    npm_hook = PLUGINS_ROOT / "npm" / "on_Binary__10_npm_install.py"
+    if not npm_hook.exists():
+        return None
+
+    binary_id = str(uuid.uuid4())
+    machine_id = str(uuid.uuid4())
+    binproviders = "*"
+    overrides = None
+
+    crawl_result = subprocess.run(
+        [sys.executable, str(DEFUDDLE_CRAWL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    for line in crawl_result.stdout.strip().split("\n"):
+        if not line.strip().startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get("type") == "Binary" and record.get("name") == "defuddle":
+            binproviders = record.get("binproviders", "*")
+            overrides = record.get("overrides")
+            break
+
+    global _defuddle_lib_root
+    if not _defuddle_lib_root:
+        _defuddle_lib_root = tempfile.mkdtemp(prefix="defuddle-lib-")
+
+    env = os.environ.copy()
+    env["LIB_DIR"] = str(Path(_defuddle_lib_root) / ".config" / "abx" / "lib")
+    env["SNAP_DIR"] = str(Path(_defuddle_lib_root) / "data")
+    env["CRAWL_DIR"] = str(Path(_defuddle_lib_root) / "crawl")
+
+    cmd = [
+        "uv",
+        "run",
+        str(npm_hook),
+        "--binary-id",
+        binary_id,
+        "--machine-id",
+        machine_id,
+        "--name",
+        "defuddle",
+        f"--binproviders={binproviders}",
+    ]
+    if overrides:
+        cmd.append(f"--overrides={json.dumps(overrides)}")
+
+    install_result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=300,
+        env=env,
+    )
+
+    for line in install_result.stdout.strip().split("\n"):
+        if not line.strip().startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get("type") == "Binary" and record.get("name") == "defuddle":
+            _defuddle_binary_path = record.get("abspath")
+            return _defuddle_binary_path
+
+    return None
+
+
+def test_hook_script_exists():
+    assert DEFUDDLE_HOOK.exists(), f"Hook script not found: {DEFUDDLE_HOOK}"
+
+
+def test_crawl_hook_emits_defuddle_binary_record():
+    result = subprocess.run(
+        [sys.executable, str(DEFUDDLE_CRAWL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+
+    assert result.returncode == 0
+    records = [
+        json.loads(line)
+        for line in result.stdout.splitlines()
+        if line.strip().startswith("{")
+    ]
+    assert records, "Expected crawl hook to emit Binary record"
+    binary = records[0]
+    assert binary.get("type") == "Binary"
+    assert binary.get("name") == "defuddle"
+    assert binary.get("overrides", {}).get("npm", {}).get("packages") == ["defuddle"]
+
+
+def test_reports_missing_dependency_when_not_installed():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        snap_dir = tmpdir / "snap"
+        snap_dir.mkdir(parents=True, exist_ok=True)
+        create_example_html(snap_dir)
+
+        env = {"PATH": "/nonexistent", "HOME": str(tmpdir), "SNAP_DIR": str(snap_dir)}
+        result = subprocess.run(
+            [
+                sys.executable,
+                str(DEFUDDLE_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test123",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+        )
+
+        assert result.returncode == 1
+        jsonl_lines = [
+            line for line in result.stdout.strip().split("\n") if line.strip().startswith("{")
+        ]
+        assert len(jsonl_lines) == 0
+        assert "defuddle" in result.stderr.lower() or "error" in result.stderr.lower()
+
+
+def test_verify_deps_with_abx_pkg():
+    binary_path = require_defuddle_binary()
+    assert Path(binary_path).is_file()
+
+
+def test_extracts_article_with_real_binary(httpserver):
+    binary_path = require_defuddle_binary()
+    test_url = httpserver.url_for("/defuddle-article")
+
+    httpserver.expect_request("/defuddle-article").respond_with_data(
+        "<html><head><title>Defuddle Test Article</title></head><body>"
+        "<article><h1>Defuddle Test Article</h1>"
+        "<p>This is test content for defuddle parser integration.</p>"
+        "</article></body></html>",
+        content_type="text/html; charset=utf-8",
+    )
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        snap_dir = tmpdir / "snap"
+        snap_dir.mkdir(parents=True, exist_ok=True)
+        singlefile_dir = snap_dir / "singlefile"
+        singlefile_dir.mkdir(parents=True, exist_ok=True)
+        html_source = singlefile_dir / "singlefile.html"
+        with urlopen(test_url, timeout=10) as response:
+            page_html = response.read().decode("utf-8")
+        html_source.write_text(
+            page_html,
+            encoding="utf-8",
+        )
+
+        env = os.environ.copy()
+        env["SNAP_DIR"] = str(snap_dir)
+        env["DEFUDDLE_BINARY"] = binary_path
+
+        result = subprocess.run(
+            [
+                sys.executable,
+                str(DEFUDDLE_HOOK),
+                "--url",
+                TEST_URL,
+                "--snapshot-id",
+                "test456",
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=30,
+            env=env,
+        )
+
+        assert result.returncode == 0, result.stderr
+
+        output_dir = snap_dir / "defuddle"
+        assert (output_dir / "content.html").exists()
+        assert (output_dir / "content.txt").exists()
+        assert (output_dir / "article.json").exists()
+
+        assert "defuddle parser integration" in (
+            output_dir / "content.html"
+        ).read_text(encoding="utf-8").lower()
+        assert "defuddle parser integration" in (
+            output_dir / "content.txt"
+        ).read_text(encoding="utf-8").lower()
+        metadata = json.loads((output_dir / "article.json").read_text(encoding="utf-8"))
+        assert metadata.get("title")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])