From 70da78219f21c23f322503b292e27b4561d2f657 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 7 Apr 2026 23:04:55 +0000
Subject: [PATCH 1/6] Update vulnerable transitive dependencies in poetry.lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bump certifi (2023.5.7 → 2026.2.25), idna (3.4 → 3.11), and
urllib3 (2.0.2 → 2.6.3) to resolve known CVEs including
CVE-2023-37920, CVE-2024-39689, CVE-2024-3651, CVE-2023-43804,
CVE-2023-45803, and CVE-2024-37891.

https://claude.ai/code/session_01EFk8Enntgip8z3nqk1ppkA
---
 poetry.lock | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 60db29b..74e6b5a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.3.3 and should not be changed by hand.
 
 [[package]]
 name = "beautifulsoup4"
@@ -25,14 +25,14 @@ lxml = ["lxml"]
 
 [[package]]
 name = "certifi"
-version = "2023.5.7"
+version = "2026.2.25"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 groups = ["main", "dev"]
 files = [
-    {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"},
-    {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"},
+    {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"},
+    {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"},
 ]
 
 [[package]]
@@ -256,16 +256,19 @@ test = ["pytest (>=6)"]
 
 [[package]]
 name = "idna"
-version = "3.4"
+version = "3.11"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.8"
 groups = ["main", "dev"]
 files = [
-    {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
-    {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+    {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"},
+    {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"},
 ]
 
+[package.extras]
+all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+
 [[package]]
 name = "iniconfig"
 version = "2.0.0"
@@ -549,21 +552,21 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.0.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
 groups = ["main", "dev"]
 files = [
-    {file = "urllib3-2.0.2-py3-none-any.whl", hash = "sha256:d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e"},
-    {file = "urllib3-2.0.2.tar.gz", hash = "sha256:61717a1095d7e155cdb737ac7bb2f4324a858a1e2e6466f6d03ff630ca68d3cc"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
-brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
-secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
+brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""]
+h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
-zstd = ["zstandard (>=0.18.0)"]
+zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""]
 
 [metadata]
 lock-version = "2.1"

From 27d3d58d6dc5885aae92dabd6fd1e95c4f3736de Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 7 Apr 2026 23:05:42 +0000
Subject: [PATCH 2/6] Update all remaining dependencies to latest compatible
 versions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bump charset-normalizer (3.1.0 → 3.4.7), requests (2.32.5 → 2.33.1),
soupsieve (2.4.1 → 2.8.3), pymupdf (1.26.7 → 1.27.2.2),
packaging (23.1 → 26.0), and others to resolve remaining
dependency alerts.

https://claude.ai/code/session_01EFk8Enntgip8z3nqk1ppkA
---
 poetry.lock | 544 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 331 insertions(+), 213 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 74e6b5a..8206501 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -37,87 +37,141 @@ files = [
 
 [[package]]
 name = "charset-normalizer"
-version = "3.1.0"
+version = "3.4.7"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
-python-versions = ">=3.7.0"
+python-versions = ">=3.7"
 groups = ["main", "dev"]
 files = [
-    {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"},
-    {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"},
-    {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"},
-    {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"},
-    {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"},
-    {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"},
-    {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-win32.whl", hash = "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943"},
+    {file = "charset_normalizer-3.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00"},
+    {file = "charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6"},
+    {file = "charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110"},
+    {file = "charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c"},
+    {file = "charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e5f4d355f0a2b1a31bc3edec6795b46324349c9cb25eed068049e4f472fb4259"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16d971e29578a5e97d7117866d15889a4a07befe0e87e703ed63cd90cb348c01"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dca4bbc466a95ba9c0234ef56d7dd9509f63da22274589ebd4ed7f1f4d4c54e3"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e80c8378d8f3d83cd3164da1ad2df9e37a666cdde7b1cb2298ed0b558064be30"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36836d6ff945a00b88ba1e4572d721e60b5b8c98c155d465f56ad19d68f23734"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_armv7l.whl", hash = "sha256:bd9b23791fe793e4968dba0c447e12f78e425c59fc0e3b97f6450f4781f3ee60"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:aef65cd602a6d0e0ff6f9930fcb1c8fec60dd2cfcb6facaf4bdb0e5873042db0"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:82b271f5137d07749f7bf32f70b17ab6eaabedd297e75dce75081a24f76eb545"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1efde3cae86c8c273f1eb3b287be7d8499420cf2fe7585c41d370d3e790054a5"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:c593052c465475e64bbfe5dbd81680f64a67fdc752c56d7a0ae205dc8aeefe0f"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:af21eb4409a119e365397b2adbaca4c9ccab56543a65d5dbd9f920d6ac29f686"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:84c018e49c3bf790f9c2771c45e9313a08c2c2a6342b162cd650258b57817706"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dd915403e231e6b1809fe9b6d9fc55cf8fb5e02765ac625d9cd623342a7905d7"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-win32.whl", hash = "sha256:320ade88cfb846b8cd6b4ddf5ee9e80ee0c1f52401f2456b84ae1ae6a1a5f207"},
+    {file = "charset_normalizer-3.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:1dc8b0ea451d6e69735094606991f32867807881400f808a106ee1d963c46a83"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:177a0ba5f0211d488e295aaf82707237e331c24788d8d76c96c5a41594723217"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e0d51f618228538a3e8f46bd246f87a6cd030565e015803691603f55e12afb5"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:14265bfe1f09498b9d8ec91e9ec9fa52775edf90fcbde092b25f4a33d444fea9"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87fad7d9ba98c86bcb41b2dc8dbb326619be2562af1f8ff50776a39e55721c5a"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f22dec1690b584cea26fade98b2435c132c1b5f68e39f5a0b7627cd7ae31f1dc"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:d61f00a0869d77422d9b2aba989e2d24afa6ffd552af442e0e58de4f35ea6d00"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6370e8686f662e6a3941ee48ed4742317cafbe5707e36406e9df792cdb535776"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a6c5863edfbe888d9eff9c8b8087354e27618d9da76425c119293f11712a6319"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:ed065083d0898c9d5b4bbec7b026fd755ff7454e6e8b73a67f8c744b13986e24"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2cd4a60d0e2fb04537162c62bbbb4182f53541fe0ede35cdf270a1c1e723cc42"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:813c0e0132266c08eb87469a642cb30aaff57c5f426255419572aaeceeaa7bf4"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:07d9e39b01743c3717745f4c530a6349eadbfa043c7577eef86c502c15df2c67"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c0f081d69a6e58272819b70288d3221a6ee64b98df852631c80f293514d3b274"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-win32.whl", hash = "sha256:8751d2787c9131302398b11e6c8068053dcb55d5a8964e114b6e196cf16cb366"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:12a6fff75f6bc66711b73a2f0addfc4c8c15a20e805146a02d147a318962c444"},
+    {file = "charset_normalizer-3.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:bb8cc7534f51d9a017b93e3e85b260924f909601c3df002bcdb58ddb4dc41a5c"},
+    {file = "charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d"},
+    {file = "charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5"},
 ]
 
 [[package]]
@@ -135,104 +189,118 @@ markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win
 
 [[package]]
 name = "coverage"
-version = "7.13.0"
+version = "7.13.5"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.10"
 groups = ["dev"]
 files = [
-    {file = "coverage-7.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:02d9fb9eccd48f6843c98a37bd6817462f130b86da8660461e8f5e54d4c06070"},
-    {file = "coverage-7.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:367449cf07d33dc216c083f2036bb7d976c6e4903ab31be400ad74ad9f85ce98"},
-    {file = "coverage-7.13.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cdb3c9f8fef0a954c632f64328a3935988d33a6604ce4bf67ec3e39670f12ae5"},
-    {file = "coverage-7.13.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d10fd186aac2316f9bbb46ef91977f9d394ded67050ad6d84d94ed6ea2e8e54e"},
-    {file = "coverage-7.13.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f88ae3e69df2ab62fb0bc5219a597cb890ba5c438190ffa87490b315190bb33"},
-    {file = "coverage-7.13.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4be718e51e86f553bcf515305a158a1cd180d23b72f07ae76d6017c3cc5d791"},
-    {file = "coverage-7.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a00d3a393207ae12f7c49bb1c113190883b500f48979abb118d8b72b8c95c032"},
-    {file = "coverage-7.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a7b1cd820e1b6116f92c6128f1188e7afe421c7e1b35fa9836b11444e53ebd9"},
-    {file = "coverage-7.13.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:37eee4e552a65866f15dedd917d5e5f3d59805994260720821e2c1b51ac3248f"},
-    {file = "coverage-7.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:62d7c4f13102148c78d7353c6052af6d899a7f6df66a32bddcc0c0eb7c5326f8"},
-    {file = "coverage-7.13.0-cp310-cp310-win32.whl", hash = "sha256:24e4e56304fdb56f96f80eabf840eab043b3afea9348b88be680ec5986780a0f"},
-    {file = "coverage-7.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:74c136e4093627cf04b26a35dab8cbfc9b37c647f0502fc313376e11726ba303"},
-    {file = "coverage-7.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0dfa3855031070058add1a59fdfda0192fd3e8f97e7c81de0596c145dea51820"},
-    {file = "coverage-7.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fdb6f54f38e334db97f72fa0c701e66d8479af0bc3f9bfb5b90f1c30f54500f"},
-    {file = "coverage-7.13.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7e442c013447d1d8d195be62852270b78b6e255b79b8675bad8479641e21fd96"},
-    {file = "coverage-7.13.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ed5630d946859de835a85e9a43b721123a8a44ec26e2830b296d478c7fd4259"},
-    {file = "coverage-7.13.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f15a931a668e58087bc39d05d2b4bf4b14ff2875b49c994bbdb1c2217a8daeb"},
-    {file = "coverage-7.13.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30a3a201a127ea57f7e14ba43c93c9c4be8b7d17a26e03bb49e6966d019eede9"},
-    {file = "coverage-7.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a485ff48fbd231efa32d58f479befce52dcb6bfb2a88bb7bf9a0b89b1bc8030"},
-    {file = "coverage-7.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:22486cdafba4f9e471c816a2a5745337742a617fef68e890d8baf9f3036d7833"},
-    {file = "coverage-7.13.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:263c3dbccc78e2e331e59e90115941b5f53e85cfcc6b3b2fbff1fd4e3d2c6ea8"},
-    {file = "coverage-7.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5330fa0cc1f5c3c4c3bb8e101b742025933e7848989370a1d4c8c5e401ea753"},
-    {file = "coverage-7.13.0-cp311-cp311-win32.whl", hash = "sha256:0f4872f5d6c54419c94c25dd6ae1d015deeb337d06e448cd890a1e89a8ee7f3b"},
-    {file = "coverage-7.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51a202e0f80f241ccb68e3e26e19ab5b3bf0f813314f2c967642f13ebcf1ddfe"},
-    {file = "coverage-7.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:d2a9d7f1c11487b1c69367ab3ac2d81b9b3721f097aa409a3191c3e90f8f3dd7"},
-    {file = "coverage-7.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0b3d67d31383c4c68e19a88e28fc4c2e29517580f1b0ebec4a069d502ce1e0bf"},
-    {file = "coverage-7.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:581f086833d24a22c89ae0fe2142cfaa1c92c930adf637ddf122d55083fb5a0f"},
-    {file = "coverage-7.13.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0a3a30f0e257df382f5f9534d4ce3d4cf06eafaf5192beb1a7bd066cb10e78fb"},
-    {file = "coverage-7.13.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:583221913fbc8f53b88c42e8dbb8fca1d0f2e597cb190ce45916662b8b9d9621"},
-    {file = "coverage-7.13.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f5d9bd30756fff3e7216491a0d6d520c448d5124d3d8e8f56446d6412499e74"},
-    {file = "coverage-7.13.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a23e5a1f8b982d56fa64f8e442e037f6ce29322f1f9e6c2344cd9e9f4407ee57"},
-    {file = "coverage-7.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b01c22bc74a7fb44066aaf765224c0d933ddf1f5047d6cdfe4795504a4493f8"},
-    {file = "coverage-7.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:898cce66d0836973f48dda4e3514d863d70142bdf6dfab932b9b6a90ea5b222d"},
-    {file = "coverage-7.13.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:3ab483ea0e251b5790c2aac03acde31bff0c736bf8a86829b89382b407cd1c3b"},
-    {file = "coverage-7.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1d84e91521c5e4cb6602fe11ece3e1de03b2760e14ae4fcf1a4b56fa3c801fcd"},
-    {file = "coverage-7.13.0-cp312-cp312-win32.whl", hash = "sha256:193c3887285eec1dbdb3f2bd7fbc351d570ca9c02ca756c3afbc71b3c98af6ef"},
-    {file = "coverage-7.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:4f3e223b2b2db5e0db0c2b97286aba0036ca000f06aca9b12112eaa9af3d92ae"},
-    {file = "coverage-7.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:086cede306d96202e15a4b77ace8472e39d9f4e5f9fd92dd4fecdfb2313b2080"},
-    {file = "coverage-7.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:28ee1c96109974af104028a8ef57cec21447d42d0e937c0275329272e370ebcf"},
-    {file = "coverage-7.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e97353dcc5587b85986cda4ff3ec98081d7e84dd95e8b2a6d59820f0545f8a"},
-    {file = "coverage-7.13.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:99acd4dfdfeb58e1937629eb1ab6ab0899b131f183ee5f23e0b5da5cba2fec74"},
-    {file = "coverage-7.13.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ff45e0cd8451e293b63ced93161e189780baf444119391b3e7d25315060368a6"},
-    {file = "coverage-7.13.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f4f72a85316d8e13234cafe0a9f81b40418ad7a082792fa4165bd7d45d96066b"},
-    {file = "coverage-7.13.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:11c21557d0e0a5a38632cbbaca5f008723b26a89d70db6315523df6df77d6232"},
-    {file = "coverage-7.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76541dc8d53715fb4f7a3a06b34b0dc6846e3c69bc6204c55653a85dd6220971"},
-    {file = "coverage-7.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6e9e451dee940a86789134b6b0ffbe31c454ade3b849bb8a9d2cca2541a8e91d"},
-    {file = "coverage-7.13.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:5c67dace46f361125e6b9cace8fe0b729ed8479f47e70c89b838d319375c8137"},
-    {file = "coverage-7.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f59883c643cb19630500f57016f76cfdcd6845ca8c5b5ea1f6e17f74c8e5f511"},
-    {file = "coverage-7.13.0-cp313-cp313-win32.whl", hash = "sha256:58632b187be6f0be500f553be41e277712baa278147ecb7559983c6d9faf7ae1"},
-    {file = "coverage-7.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:73419b89f812f498aca53f757dd834919b48ce4799f9d5cad33ca0ae442bdb1a"},
-    {file = "coverage-7.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:eb76670874fdd6091eedcc856128ee48c41a9bbbb9c3f1c7c3cf169290e3ffd6"},
-    {file = "coverage-7.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6e63ccc6e0ad8986386461c3c4b737540f20426e7ec932f42e030320896c311a"},
-    {file = "coverage-7.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:494f5459ffa1bd45e18558cd98710c36c0b8fbfa82a5eabcbe671d80ecffbfe8"},
-    {file = "coverage-7.13.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:06cac81bf10f74034e055e903f5f946e3e26fc51c09fc9f584e4a1605d977053"},
-    {file = "coverage-7.13.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f2ffc92b46ed6e6760f1d47a71e56b5664781bc68986dbd1836b2b70c0ce2071"},
-    {file = "coverage-7.13.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0602f701057c6823e5db1b74530ce85f17c3c5be5c85fc042ac939cbd909426e"},
-    {file = "coverage-7.13.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:25dc33618d45456ccb1d37bce44bc78cf269909aa14c4db2e03d63146a8a1493"},
-    {file = "coverage-7.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:71936a8b3b977ddd0b694c28c6a34f4fff2e9dd201969a4ff5d5fc7742d614b0"},
-    {file = "coverage-7.13.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:936bc20503ce24770c71938d1369461f0c5320830800933bc3956e2a4ded930e"},
-    {file = "coverage-7.13.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:af0a583efaacc52ae2521f8d7910aff65cdb093091d76291ac5820d5e947fc1c"},
-    {file = "coverage-7.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f1c23e24a7000da892a312fb17e33c5f94f8b001de44b7cf8ba2e36fbd15859e"},
-    {file = "coverage-7.13.0-cp313-cp313t-win32.whl", hash = "sha256:5f8a0297355e652001015e93be345ee54393e45dc3050af4a0475c5a2b767d46"},
-    {file = "coverage-7.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6abb3a4c52f05e08460bd9acf04fec027f8718ecaa0d09c40ffbc3fbd70ecc39"},
-    {file = "coverage-7.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:3ad968d1e3aa6ce5be295ab5fe3ae1bf5bb4769d0f98a80a0252d543a2ef2e9e"},
-    {file = "coverage-7.13.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:453b7ec753cf5e4356e14fe858064e5520c460d3bbbcb9c35e55c0d21155c256"},
-    {file = "coverage-7.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:af827b7cbb303e1befa6c4f94fd2bf72f108089cfa0f8abab8f4ca553cf5ca5a"},
-    {file = "coverage-7.13.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9987a9e4f8197a1000280f7cc089e3ea2c8b3c0a64d750537809879a7b4ceaf9"},
-    {file = "coverage-7.13.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3188936845cd0cb114fa6a51842a304cdbac2958145d03be2377ec41eb285d19"},
-    {file = "coverage-7.13.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2bdb3babb74079f021696cb46b8bb5f5661165c385d3a238712b031a12355be"},
-    {file = "coverage-7.13.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7464663eaca6adba4175f6c19354feea61ebbdd735563a03d1e472c7072d27bb"},
-    {file = "coverage-7.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8069e831f205d2ff1f3d355e82f511eb7c5522d7d413f5db5756b772ec8697f8"},
-    {file = "coverage-7.13.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:6fb2d5d272341565f08e962cce14cdf843a08ac43bd621783527adb06b089c4b"},
-    {file = "coverage-7.13.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:5e70f92ef89bac1ac8a99b3324923b4749f008fdbd7aa9cb35e01d7a284a04f9"},
-    {file = "coverage-7.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4b5de7d4583e60d5fd246dd57fcd3a8aa23c6e118a8c72b38adf666ba8e7e927"},
-    {file = "coverage-7.13.0-cp314-cp314-win32.whl", hash = "sha256:a6c6e16b663be828a8f0b6c5027d36471d4a9f90d28444aa4ced4d48d7d6ae8f"},
-    {file = "coverage-7.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:0900872f2fdb3ee5646b557918d02279dc3af3dfb39029ac4e945458b13f73bc"},
-    {file = "coverage-7.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:3a10260e6a152e5f03f26db4a407c4c62d3830b9af9b7c0450b183615f05d43b"},
-    {file = "coverage-7.13.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9097818b6cc1cfb5f174e3263eba4a62a17683bcfe5c4b5d07f4c97fa51fbf28"},
-    {file = "coverage-7.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0018f73dfb4301a89292c73be6ba5f58722ff79f51593352759c1790ded1cabe"},
-    {file = "coverage-7.13.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:166ad2a22ee770f5656e1257703139d3533b4a0b6909af67c6b4a3adc1c98657"},
-    {file = "coverage-7.13.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f6aaef16d65d1787280943f1c8718dc32e9cf141014e4634d64446702d26e0ff"},
-    {file = "coverage-7.13.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e999e2dcc094002d6e2c7bbc1fb85b58ba4f465a760a8014d97619330cdbbbf3"},
-    {file = "coverage-7.13.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:00c3d22cf6fb1cf3bf662aaaa4e563be8243a5ed2630339069799835a9cc7f9b"},
-    {file = "coverage-7.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22ccfe8d9bb0d6134892cbe1262493a8c70d736b9df930f3f3afae0fe3ac924d"},
-    {file = "coverage-7.13.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9372dff5ea15930fea0445eaf37bbbafbc771a49e70c0aeed8b4e2c2614cc00e"},
-    {file = "coverage-7.13.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:69ac2c492918c2461bc6ace42d0479638e60719f2a4ef3f0815fa2df88e9f940"},
-    {file = "coverage-7.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:739c6c051a7540608d097b8e13c76cfa85263ced467168dc6b477bae3df7d0e2"},
-    {file = "coverage-7.13.0-cp314-cp314t-win32.whl", hash = "sha256:fe81055d8c6c9de76d60c94ddea73c290b416e061d40d542b24a5871bad498b7"},
-    {file = "coverage-7.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:445badb539005283825959ac9fa4a28f712c214b65af3a2c464f1adc90f5fcbc"},
-    {file = "coverage-7.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:de7f6748b890708578fc4b7bb967d810aeb6fcc9bff4bb77dbca77dab2f9df6a"},
-    {file = "coverage-7.13.0-py3-none-any.whl", hash = "sha256:850d2998f380b1e266459ca5b47bc9e7daf9af1d070f66317972f382d46f1904"},
-    {file = "coverage-7.13.0.tar.gz", hash = "sha256:a394aa27f2d7ff9bc04cf703817773a59ad6dfbd577032e690f961d2460ee936"},
+    {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"},
+    {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"},
+    {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"},
+    {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"},
+    {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"},
+    {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"},
+    {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"},
+    {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"},
+    {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"},
+    {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"},
+    {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"},
+    {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"},
+    {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"},
+    {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"},
+    {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"},
+    {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"},
+    {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"},
+    {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"},
+    {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"},
+    {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"},
+    {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"},
+    {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"},
+    {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"},
+    {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"},
+    {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"},
+    {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"},
+    {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"},
+    {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"},
+    {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"},
+    {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"},
+    {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"},
+    {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"},
+    {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"},
+    {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"},
+    {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"},
+    {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"},
 ]
 
 [package.extras]
@@ -240,17 +308,20 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 
 [[package]]
 name = "exceptiongroup"
-version = "1.1.1"
+version = "1.3.1"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 groups = ["dev"]
 markers = "python_version == \"3.10\""
 files = [
-    {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
-    {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
+    {file = "exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"},
+    {file = "exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219"},
 ]
 
+[package.dependencies]
+typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""}
+
 [package.extras]
 test = ["pytest (>=6)"]
 
@@ -271,26 +342,26 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2
 
 [[package]]
 name = "iniconfig"
-version = "2.0.0"
+version = "2.3.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.10"
 groups = ["dev"]
 files = [
-    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
-    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+    {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"},
+    {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
 ]
 
 [[package]]
 name = "packaging"
-version = "23.1"
+version = "26.0"
 description = "Core utilities for Python packages"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 groups = ["dev"]
 files = [
-    {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
-    {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
+    {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"},
+    {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"},
 ]
 
 [[package]]
@@ -311,14 +382,14 @@ testing = ["coverage", "pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pygments"
-version = "2.19.2"
+version = "2.20.0"
 description = "Pygments is a syntax highlighting package written in Python."
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 groups = ["dev"]
 files = [
-    {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
-    {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
+    {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"},
+    {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"},
 ]
 
 [package.extras]
@@ -326,20 +397,21 @@ windows-terminal = ["colorama (>=0.4.6)"]
 
 [[package]]
 name = "pymupdf"
-version = "1.26.7"
+version = "1.27.2.2"
 description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "pymupdf-1.26.7-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:07085718dfdae5ab83b05eb5eb397f863bcc538fe05135318a01ea353e7a1353"},
-    {file = "pymupdf-1.26.7-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:31aa9c8377ea1eea02934b92f4dcf79fb2abba0bf41f8a46d64c3e31546a3c02"},
-    {file = "pymupdf-1.26.7-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e419b609996434a14a80fa060adec72c434a1cca6a511ec54db9841bc5d51b3c"},
-    {file = "pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:69dfc78f206a96e5b3ac22741263ebab945fdf51f0dbe7c5757c3511b23d9d72"},
-    {file = "pymupdf-1.26.7-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1d5106f46e1ca0d64d46bd51892372a4f82076bdc14a9678d33d630702abca36"},
-    {file = "pymupdf-1.26.7-cp310-abi3-win32.whl", hash = "sha256:7c9645b6f5452629c747690190350213d3e5bbdb6b2eca227d82702b327f6eee"},
-    {file = "pymupdf-1.26.7-cp310-abi3-win_amd64.whl", hash = "sha256:425b1befe40d41b72eb0fe211711c7ae334db5eb60307e9dd09066ed060cceba"},
-    {file = "pymupdf-1.26.7.tar.gz", hash = "sha256:71add8bdc8eb1aaa207c69a13400693f06ad9b927bea976f5d5ab9df0bb489c3"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:800f43e60a6f01f644343c2213b8613db02eaf4f4ba235b417b3351fa99e01c0"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2e4299ef1ac0c9dff9be096cbd22783699673abecfa7c3f73173ae06421d73"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5e3d54922db1c7da844f1208ac1db05704770988752311f81dd36694ae0a07b"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:892698c9768457eb0991c102c96a856c0a7062539371df5e6bee0816f3ef498e"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b4bbfa6ef347fade678771a93f6364971c51a2cdc44cd2400dc4eeed1ddb4e6"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-win32.whl", hash = "sha256:0b8e924433b7e0bd46be820899300259235997d5a747638471fb2762baa8ee30"},
+    {file = "pymupdf-1.27.2.2-cp310-abi3-win_amd64.whl", hash = "sha256:09bb53f9486ccb5297030cbc2dbdae845ba1c3c5126e96eb2d16c4f118de0b5b"},
+    {file = "pymupdf-1.27.2.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6cebfbbdfd219ebdebf4d8e3914624b2e3d3a844c43f4f76935822dd9b13cc12"},
+    {file = "pymupdf-1.27.2.2.tar.gz", hash = "sha256:ea8fdc3ab6671ca98f629d5ec3032d662c8cf1796b146996b7ad306ac7ed3335"},
 ]
 
 [[package]]
@@ -451,25 +523,25 @@ files = [
 
 [[package]]
 name = "requests"
-version = "2.32.5"
+version = "2.33.1"
 description = "Python HTTP for Humans."
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 groups = ["main", "dev"]
 files = [
-    {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"},
-    {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"},
+    {file = "requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a"},
+    {file = "requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517"},
 ]
 
 [package.dependencies]
-certifi = ">=2017.4.17"
+certifi = ">=2023.5.7"
 charset_normalizer = ">=2,<4"
 idna = ">=2.5,<4"
-urllib3 = ">=1.21.1,<3"
+urllib3 = ">=1.26,<3"
 
 [package.extras]
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
-use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<8)"]
 
 [[package]]
 name = "responses"
@@ -493,39 +565,84 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asy
 
 [[package]]
 name = "soupsieve"
-version = "2.4.1"
+version = "2.8.3"
 description = "A modern CSS selector implementation for Beautiful Soup."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "soupsieve-2.4.1-py3-none-any.whl", hash = "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8"},
-    {file = "soupsieve-2.4.1.tar.gz", hash = "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"},
+    {file = "soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95"},
+    {file = "soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349"},
 ]
 
 [[package]]
 name = "tomli"
-version = "2.0.1"
+version = "2.4.1"
 description = "A lil' TOML parser"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 groups = ["dev"]
 markers = "python_version == \"3.10\""
 files = [
-    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
-    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+    {file = "tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30"},
+    {file = "tomli-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a"},
+    {file = "tomli-2.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076"},
+    {file = "tomli-2.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9"},
+    {file = "tomli-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c"},
+    {file = "tomli-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc"},
+    {file = "tomli-2.4.1-cp311-cp311-win32.whl", hash = "sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049"},
+    {file = "tomli-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e"},
+    {file = "tomli-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece"},
+    {file = "tomli-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a"},
+    {file = "tomli-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085"},
+    {file = "tomli-2.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9"},
+    {file = "tomli-2.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5"},
+    {file = "tomli-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585"},
+    {file = "tomli-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1"},
+    {file = "tomli-2.4.1-cp312-cp312-win32.whl", hash = "sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917"},
+    {file = "tomli-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9"},
+    {file = "tomli-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257"},
+    {file = "tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54"},
+    {file = "tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a"},
+    {file = "tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897"},
+    {file = "tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f"},
+    {file = "tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d"},
+    {file = "tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5"},
+    {file = "tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd"},
+    {file = "tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36"},
+    {file = "tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd"},
+    {file = "tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf"},
+    {file = "tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac"},
+    {file = "tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662"},
+    {file = "tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853"},
+    {file = "tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15"},
+    {file = "tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba"},
+    {file = "tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6"},
+    {file = "tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7"},
+    {file = "tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232"},
+    {file = "tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4"},
+    {file = "tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c"},
+    {file = "tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d"},
+    {file = "tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41"},
+    {file = "tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c"},
+    {file = "tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f"},
+    {file = "tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8"},
+    {file = "tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26"},
+    {file = "tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396"},
+    {file = "tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe"},
+    {file = "tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f"},
 ]
 
 [[package]]
 name = "tqdm"
-version = "4.67.1"
+version = "4.67.3"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
-    {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
+    {file = "tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf"},
+    {file = "tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb"},
 ]
 
 [package.dependencies]
@@ -544,11 +661,12 @@ version = "4.15.0"
 description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
     {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
 ]
+markers = {dev = "python_version == \"3.10\""}
 
 [[package]]
 name = "urllib3"

From 9edf5f29286045ece5d406f4483b6ed0abb1ecad Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 7 Apr 2026 23:33:37 +0000
Subject: [PATCH 3/6] Add 5 major enhancements: concurrent downloads, catalog,
 watch mode, MCP server, site schemas

Enhancement 1 - Concurrent Downloads:
  - New async_downloader.py with ThreadPoolExecutor-based parallel downloads
  - Thread-safe rate limiter shared across workers
  - --concurrent and --max-workers CLI flags
  - Backward compatible: sequential remains default

Enhancement 2 - Persistent Document Catalog:
  - New catalog.py with SQLite-backed DocumentCatalog
  - Content-hash-based change detection and cross-URL deduplication
  - Run history tracking with diff summaries
  - Export as JSON/CSV, search by URL/filename
  - CLI: fetcharoo catalog {show|export|search|runs|duplicates}

Enhancement 3 - Watch Mode:
  - New watcher.py and notifications.py
  - One-shot diff: fetcharoo diff <url> (cron-friendly)
  - Continuous watch: fetcharoo watch <url> --interval 3600
  - Notifications: stdout, JSON, webhook, shell command
  - Git-like diff output: + new, ~ changed, - removed

Enhancement 4 - MCP Server:
  - New mcp_server.py exposing stateful tools via FastMCP
  - Tools: discover_pdfs, download_pdfs, catalog_query, catalog_diff,
    catalog_search, get_document_metadata, find_duplicate_documents
  - Optional dependency: pip install fetcharoo[mcp]
  - CLI: fetcharoo mcp serve

Enhancement 5 - Community Site Schemas:
  - 5 built-in schemas: arxiv, ietf_rfc, sec_edgar, w3c, federal_register
  - Auto-detection: --schema auto matches URL to schema
  - find_schema() and list_schemas() API
  - CLI: fetcharoo schemas {list|match}

All 337 tests pass (276 existing + 61 new).

https://claude.ai/code/session_01EFk8Enntgip8z3nqk1ppkA
---
 fetcharoo/__init__.py                       |  22 +-
 fetcharoo/async_downloader.py               | 100 ++++
 fetcharoo/catalog.py                        | 518 ++++++++++++++++++++
 fetcharoo/cli.py                            | 394 ++++++++++++++-
 fetcharoo/fetcharoo.py                      |  44 +-
 fetcharoo/mcp_server.py                     | 303 ++++++++++++
 fetcharoo/notifications.py                  | 166 +++++++
 fetcharoo/schemas/__init__.py               |  35 +-
 fetcharoo/schemas/sites/__init__.py         |  29 ++
 fetcharoo/schemas/sites/arxiv.py            |  20 +
 fetcharoo/schemas/sites/federal_register.py |  18 +
 fetcharoo/schemas/sites/ietf_rfc.py         |  20 +
 fetcharoo/schemas/sites/sec_edgar.py        |  20 +
 fetcharoo/schemas/sites/w3c.py              |  18 +
 fetcharoo/watcher.py                        | 202 ++++++++
 pyproject.toml                              |   4 +-
 tests/test_async_downloader.py              |  85 ++++
 tests/test_catalog.py                       | 209 ++++++++
 tests/test_schemas_registry.py              |  90 ++++
 tests/test_watcher.py                       | 148 ++++++
 20 files changed, 2425 insertions(+), 20 deletions(-)
 create mode 100644 fetcharoo/async_downloader.py
 create mode 100644 fetcharoo/catalog.py
 create mode 100644 fetcharoo/mcp_server.py
 create mode 100644 fetcharoo/notifications.py
 create mode 100644 fetcharoo/schemas/sites/__init__.py
 create mode 100644 fetcharoo/schemas/sites/arxiv.py
 create mode 100644 fetcharoo/schemas/sites/federal_register.py
 create mode 100644 fetcharoo/schemas/sites/ietf_rfc.py
 create mode 100644 fetcharoo/schemas/sites/sec_edgar.py
 create mode 100644 fetcharoo/schemas/sites/w3c.py
 create mode 100644 fetcharoo/watcher.py
 create mode 100644 tests/test_async_downloader.py
 create mode 100644 tests/test_catalog.py
 create mode 100644 tests/test_schemas_registry.py
 create mode 100644 tests/test_watcher.py

diff --git a/fetcharoo/__init__.py b/fetcharoo/__init__.py
index 1fbeb7c..11ef18d 100644
--- a/fetcharoo/__init__.py
+++ b/fetcharoo/__init__.py
@@ -2,7 +2,8 @@
 fetcharoo - A Python library for downloading PDF files from webpages.
 
 This library provides tools for finding and downloading PDF files from webpages,
-with support for recursive link following, PDF merging, and configurable options.
+with support for recursive link following, PDF merging, concurrent downloads,
+persistent document tracking, change monitoring, and configurable options.
 """
 
 from fetcharoo.fetcharoo import (
@@ -20,6 +21,7 @@
 )
 from fetcharoo.pdf_utils import merge_pdfs, save_pdf_to_file
 from fetcharoo.downloader import download_pdf
+from fetcharoo.async_downloader import download_pdfs_concurrent
 from fetcharoo.file_utils import check_file_exists, check_pdf_exists
 from fetcharoo.filtering import (
     FilterConfig,
@@ -29,8 +31,11 @@
     apply_filters,
     should_download_pdf,
 )
+from fetcharoo.catalog import DocumentCatalog, DocumentRecord, DiffResult
+from fetcharoo.watcher import DocumentWatcher, diff_once
+from fetcharoo.schemas import SiteSchema, find_schema, list_schemas
 
-__version__ = "0.1.0"
+__version__ = "0.3.0"
 
 __all__ = [
     # Main API
@@ -41,6 +46,8 @@
     "merge_pdfs",
     "save_pdf_to_file",
     "download_pdf",
+    # Concurrent downloads
+    "download_pdfs_concurrent",
     # File utilities
     "check_file_exists",
     "check_pdf_exists",
@@ -63,6 +70,17 @@
     "matches_url_pattern",
     "apply_filters",
     "should_download_pdf",
+    # Catalog
+    "DocumentCatalog",
+    "DocumentRecord",
+    "DiffResult",
+    # Watcher
+    "DocumentWatcher",
+    "diff_once",
+    # Schemas
+    "SiteSchema",
+    "find_schema",
+    "list_schemas",
     # Version
     "__version__",
 ]
diff --git a/fetcharoo/async_downloader.py b/fetcharoo/async_downloader.py
new file mode 100644
index 0000000..454b75d
--- /dev/null
+++ b/fetcharoo/async_downloader.py
@@ -0,0 +1,100 @@
+"""
+Concurrent PDF downloading for fetcharoo.
+
+This module provides parallel download capabilities using ThreadPoolExecutor,
+allowing multiple PDFs to be downloaded simultaneously with configurable
+concurrency limits and shared rate limiting.
+"""
+
+import logging
+import time
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Dict, List, Optional, Tuple
+
+from fetcharoo.downloader import download_pdf
+
+logger = logging.getLogger('fetcharoo')
+
+
+class RateLimiter:
+    """Thread-safe rate limiter using token bucket algorithm."""
+
+    def __init__(self, min_interval: float = 0.5):
+        """
+        Args:
+            min_interval: Minimum seconds between requests.
+        """
+        self._min_interval = min_interval
+        self._last_request = 0.0
+        self._lock = threading.Lock()
+
+    def wait(self) -> None:
+        """Block until enough time has passed since the last request."""
+        with self._lock:
+            now = time.monotonic()
+            elapsed = now - self._last_request
+            if elapsed < self._min_interval:
+                time.sleep(self._min_interval - elapsed)
+            self._last_request = time.monotonic()
+
+
+def download_pdfs_concurrent(
+    pdf_links: List[str],
+    max_workers: int = 5,
+    timeout: int = 30,
+    user_agent: Optional[str] = None,
+    request_delay: float = 0.1,
+    progress_callback: Optional[callable] = None,
+) -> List[Tuple[Optional[bytes], str]]:
+    """
+    Download multiple PDFs concurrently using a thread pool.
+
+    Args:
+        pdf_links: List of PDF URLs to download.
+        max_workers: Maximum number of concurrent download threads.
+        timeout: Request timeout in seconds per download.
+        user_agent: Custom User-Agent string.
+        request_delay: Minimum delay between requests (shared across workers).
+        progress_callback: Optional callable invoked after each download completes.
+                          Called with no arguments.
+
+    Returns:
+        List of (content, url) tuples in the same order as pdf_links.
+        content is bytes on success or None on failure.
+    """
+    if not pdf_links:
+        return []
+
+    rate_limiter = RateLimiter(min_interval=request_delay)
+    results: Dict[int, Tuple[Optional[bytes], str]] = {}
+
+    def _download_one(index: int, url: str) -> Tuple[int, Optional[bytes], str]:
+        rate_limiter.wait()
+        content = download_pdf(url, timeout=timeout, user_agent=user_agent)
+        return index, content, url
+
+    # Cap workers to number of links
+    actual_workers = min(max_workers, len(pdf_links))
+
+    with ThreadPoolExecutor(max_workers=actual_workers) as executor:
+        futures = {
+            executor.submit(_download_one, i, url): i
+            for i, url in enumerate(pdf_links)
+        }
+
+        for future in as_completed(futures):
+            try:
+                index, content, url = future.result()
+                results[index] = (content, url)
+            except Exception as e:
+                idx = futures[future]
+                url = pdf_links[idx]
+                logger.error(f"Unexpected error downloading {url}: {e}")
+                results[idx] = (None, url)
+
+            if progress_callback:
+                progress_callback()
+
+    # Return in original order
+    return [results[i] for i in range(len(pdf_links))]
diff --git a/fetcharoo/catalog.py b/fetcharoo/catalog.py
new file mode 100644
index 0000000..196fc96
--- /dev/null
+++ b/fetcharoo/catalog.py
@@ -0,0 +1,518 @@
+"""
+Persistent document catalog for fetcharoo.
+
+Tracks every PDF fetcharoo has ever seen across runs using SQLite.
+Provides content-hash-based change detection, cross-URL deduplication,
+run history, and metadata extraction.
+"""
+
+import csv
+import hashlib
+import io
+import json
+import logging
+import os
+import sqlite3
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+import pymupdf
+
+logger = logging.getLogger('fetcharoo')
+
+# Current schema version for migrations
+CATALOG_SCHEMA_VERSION = 1
+
+
+@dataclass
+class DocumentRecord:
+    """A single document tracked in the catalog."""
+    id: str
+    url: str
+    filename: Optional[str] = None
+    content_hash: Optional[str] = None
+    size_bytes: Optional[int] = None
+    first_seen: Optional[str] = None
+    last_seen: Optional[str] = None
+    last_changed: Optional[str] = None
+    status: str = 'active'
+    source_page: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
+
+
+@dataclass
+class RunRecord:
+    """Summary of a single catalog run."""
+    id: Optional[int] = None
+    url: str = ''
+    timestamp: Optional[str] = None
+    documents_found: int = 0
+    documents_new: int = 0
+    documents_changed: int = 0
+    documents_removed: int = 0
+
+
+@dataclass
+class DiffResult:
+    """Result of comparing current state against catalog."""
+    new: List[DocumentRecord] = field(default_factory=list)
+    changed: List[DocumentRecord] = field(default_factory=list)
+    removed: List[DocumentRecord] = field(default_factory=list)
+    unchanged: List[DocumentRecord] = field(default_factory=list)
+
+
+def _url_id(url: str) -> str:
+    """Generate a deterministic ID for a URL."""
+    return hashlib.sha256(url.encode('utf-8')).hexdigest()[:16]
+
+
+def _content_hash(content: bytes) -> str:
+    """Generate a SHA-256 hash of PDF content."""
+    return hashlib.sha256(content).hexdigest()
+
+
+def _now_iso() -> str:
+    """Return current UTC time as ISO 8601 string."""
+    return datetime.now(timezone.utc).isoformat()
+
+
+def extract_pdf_metadata(content: bytes) -> Dict[str, Any]:
+    """
+    Extract metadata from PDF content using PyMuPDF.
+
+    Args:
+        content: Raw PDF bytes.
+
+    Returns:
+        Dict with keys like title, author, page_count, creation_date.
+    """
+    metadata: Dict[str, Any] = {}
+    try:
+        doc = pymupdf.Document(stream=content, filetype="pdf")
+        meta = doc.metadata or {}
+        metadata['title'] = meta.get('title', '') or ''
+        metadata['author'] = meta.get('author', '') or ''
+        metadata['subject'] = meta.get('subject', '') or ''
+        metadata['creator'] = meta.get('creator', '') or ''
+        metadata['producer'] = meta.get('producer', '') or ''
+        metadata['creation_date'] = meta.get('creationDate', '') or ''
+        metadata['page_count'] = doc.page_count
+        doc.close()
+    except Exception as e:
+        logger.debug(f"Could not extract PDF metadata: {e}")
+    return metadata
+
+
+class DocumentCatalog:
+    """
+    SQLite-backed persistent document catalog.
+
+    Tracks documents across runs with content-hash-based change detection.
+    """
+
+    def __init__(self, db_path: Optional[str] = None):
+        """
+        Args:
+            db_path: Path to SQLite database file. Defaults to ~/.fetcharoo/catalog.db
+        """
+        if db_path is None:
+            catalog_dir = os.path.join(os.path.expanduser('~'), '.fetcharoo')
+            os.makedirs(catalog_dir, exist_ok=True)
+            db_path = os.path.join(catalog_dir, 'catalog.db')
+
+        self.db_path = db_path
+        self._conn = sqlite3.connect(db_path)
+        self._conn.row_factory = sqlite3.Row
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        """Create tables if they don't exist."""
+        with self._conn:
+            self._conn.executescript("""
+                CREATE TABLE IF NOT EXISTS documents (
+                    id TEXT PRIMARY KEY,
+                    url TEXT NOT NULL,
+                    filename TEXT,
+                    content_hash TEXT,
+                    size_bytes INTEGER,
+                    first_seen TEXT,
+                    last_seen TEXT,
+                    last_changed TEXT,
+                    status TEXT DEFAULT 'active',
+                    source_page TEXT,
+                    metadata TEXT DEFAULT '{}'
+                );
+
+                CREATE TABLE IF NOT EXISTS runs (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    url TEXT,
+                    timestamp TEXT,
+                    documents_found INTEGER DEFAULT 0,
+                    documents_new INTEGER DEFAULT 0,
+                    documents_changed INTEGER DEFAULT 0,
+                    documents_removed INTEGER DEFAULT 0
+                );
+
+                CREATE INDEX IF NOT EXISTS idx_documents_url ON documents(url);
+                CREATE INDEX IF NOT EXISTS idx_documents_content_hash ON documents(content_hash);
+                CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
+                CREATE INDEX IF NOT EXISTS idx_runs_url ON runs(url);
+            """)
+
+    def upsert_document(
+        self,
+        url: str,
+        content: Optional[bytes] = None,
+        source_page: Optional[str] = None,
+        filename: Optional[str] = None,
+    ) -> DocumentRecord:
+        """
+        Insert or update a document in the catalog.
+
+        Args:
+            url: The document URL.
+            content: Raw PDF bytes (for hashing and metadata extraction).
+            source_page: The page where this PDF was discovered.
+            filename: The PDF filename.
+
+        Returns:
+            The upserted DocumentRecord.
+        """
+        doc_id = _url_id(url)
+        now = _now_iso()
+        c_hash = _content_hash(content) if content else None
+        size = len(content) if content else None
+        metadata = extract_pdf_metadata(content) if content else {}
+
+        existing = self._get_raw(doc_id)
+        if existing is None:
+            # New document
+            record = DocumentRecord(
+                id=doc_id,
+                url=url,
+                filename=filename,
+                content_hash=c_hash,
+                size_bytes=size,
+                first_seen=now,
+                last_seen=now,
+                last_changed=now,
+                status='active',
+                source_page=source_page,
+                metadata=metadata,
+            )
+            self._insert(record)
+        else:
+            # Existing document — check for changes
+            old_hash = existing['content_hash']
+            changed = c_hash is not None and old_hash != c_hash
+            record = DocumentRecord(
+                id=doc_id,
+                url=url,
+                filename=filename or existing['filename'],
+                content_hash=c_hash or existing['content_hash'],
+                size_bytes=size if size is not None else existing['size_bytes'],
+                first_seen=existing['first_seen'],
+                last_seen=now,
+                last_changed=now if changed else existing['last_changed'],
+                status='active',
+                source_page=source_page or existing['source_page'],
+                metadata=metadata or json.loads(existing['metadata'] or '{}'),
+            )
+            self._update(record)
+
+        return record
+
+    def record_discovery(
+        self,
+        url: str,
+        source_page: Optional[str] = None,
+        filename: Optional[str] = None,
+    ) -> DocumentRecord:
+        """
+        Record that a PDF URL was discovered (without downloading content).
+
+        Args:
+            url: The document URL.
+            source_page: The page where this PDF was discovered.
+            filename: The PDF filename.
+
+        Returns:
+            The DocumentRecord.
+        """
+        return self.upsert_document(url, content=None, source_page=source_page, filename=filename)
+
+    def mark_removed(self, url: str) -> None:
+        """Mark a document as removed (no longer found at its URL)."""
+        doc_id = _url_id(url)
+        with self._conn:
+            self._conn.execute(
+                "UPDATE documents SET status = 'removed', last_seen = ? WHERE id = ?",
+                (_now_iso(), doc_id)
+            )
+
+    def get_document(self, url: str) -> Optional[DocumentRecord]:
+        """Get a document record by URL."""
+        doc_id = _url_id(url)
+        row = self._get_raw(doc_id)
+        if row is None:
+            return None
+        return self._row_to_record(row)
+
+    def get_active_documents(self, source_page: Optional[str] = None) -> List[DocumentRecord]:
+        """
+        Get all active documents, optionally filtered by source page.
+
+        Args:
+            source_page: If provided, only return documents from this source page.
+        """
+        if source_page:
+            rows = self._conn.execute(
+                "SELECT * FROM documents WHERE status = 'active' AND source_page = ? ORDER BY url",
+                (source_page,)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT * FROM documents WHERE status = 'active' ORDER BY url"
+            ).fetchall()
+        return [self._row_to_record(r) for r in rows]
+
+    def get_all_documents(self) -> List[DocumentRecord]:
+        """Get all documents regardless of status."""
+        rows = self._conn.execute(
+            "SELECT * FROM documents ORDER BY url"
+        ).fetchall()
+        return [self._row_to_record(r) for r in rows]
+
+    def find_duplicates(self) -> Dict[str, List[DocumentRecord]]:
+        """
+        Find documents with the same content hash but different URLs.
+
+        Returns:
+            Dict mapping content_hash to list of DocumentRecords sharing that hash.
+        """
+        rows = self._conn.execute("""
+            SELECT content_hash, COUNT(*) as cnt FROM documents
+            WHERE content_hash IS NOT NULL AND status = 'active'
+            GROUP BY content_hash HAVING cnt > 1
+        """).fetchall()
+
+        duplicates: Dict[str, List[DocumentRecord]] = {}
+        for row in rows:
+            hash_val = row['content_hash']
+            doc_rows = self._conn.execute(
+                "SELECT * FROM documents WHERE content_hash = ? AND status = 'active'",
+                (hash_val,)
+            ).fetchall()
+            duplicates[hash_val] = [self._row_to_record(r) for r in doc_rows]
+        return duplicates
+
+    def search(self, query: str) -> List[DocumentRecord]:
+        """
+        Search documents by URL or filename substring.
+
+        Args:
+            query: Search string (matched against URL and filename).
+        """
+        pattern = f"%{query}%"
+        rows = self._conn.execute(
+            "SELECT * FROM documents WHERE (url LIKE ? OR filename LIKE ?) ORDER BY url",
+            (pattern, pattern)
+        ).fetchall()
+        return [self._row_to_record(r) for r in rows]
+
+    def diff(self, current_urls: List[str]) -> DiffResult:
+        """
+        Compare a list of currently discovered URLs against the catalog.
+
+        Args:
+            current_urls: URLs found in the current crawl.
+
+        Returns:
+            DiffResult with new, changed, removed, and unchanged documents.
+        """
+        result = DiffResult()
+        current_set = set(current_urls)
+        known_docs = {doc.url: doc for doc in self.get_active_documents()}
+
+        for url in current_urls:
+            if url in known_docs:
+                result.unchanged.append(known_docs[url])
+            else:
+                result.new.append(DocumentRecord(
+                    id=_url_id(url),
+                    url=url,
+                    status='new',
+                ))
+
+        for url, doc in known_docs.items():
+            if url not in current_set:
+                result.removed.append(doc)
+
+        return result
+
+    def record_run(self, url: str, diff: DiffResult) -> RunRecord:
+        """
+        Record a run in the catalog.
+
+        Args:
+            url: The source URL that was crawled.
+            diff: The DiffResult from this run.
+
+        Returns:
+            The RunRecord.
+        """
+        run = RunRecord(
+            url=url,
+            timestamp=_now_iso(),
+            documents_found=len(diff.new) + len(diff.unchanged) + len(diff.changed),
+            documents_new=len(diff.new),
+            documents_changed=len(diff.changed),
+            documents_removed=len(diff.removed),
+        )
+        with self._conn:
+            cursor = self._conn.execute(
+                "INSERT INTO runs (url, timestamp, documents_found, documents_new, documents_changed, documents_removed) VALUES (?, ?, ?, ?, ?, ?)",
+                (run.url, run.timestamp, run.documents_found, run.documents_new, run.documents_changed, run.documents_removed)
+            )
+            run.id = cursor.lastrowid
+        return run
+
+    def get_runs(self, url: Optional[str] = None, limit: int = 20) -> List[RunRecord]:
+        """Get recent runs, optionally filtered by URL."""
+        if url:
+            rows = self._conn.execute(
+                "SELECT * FROM runs WHERE url = ? ORDER BY timestamp DESC LIMIT ?",
+                (url, limit)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT * FROM runs ORDER BY timestamp DESC LIMIT ?",
+                (limit,)
+            ).fetchall()
+        return [
+            RunRecord(
+                id=r['id'], url=r['url'], timestamp=r['timestamp'],
+                documents_found=r['documents_found'], documents_new=r['documents_new'],
+                documents_changed=r['documents_changed'], documents_removed=r['documents_removed']
+            )
+            for r in rows
+        ]
+
+    def export_json(self) -> str:
+        """Export the entire catalog as a JSON string."""
+        docs = self.get_all_documents()
+        data = []
+        for doc in docs:
+            d = {
+                'id': doc.id,
+                'url': doc.url,
+                'filename': doc.filename,
+                'content_hash': doc.content_hash,
+                'size_bytes': doc.size_bytes,
+                'first_seen': doc.first_seen,
+                'last_seen': doc.last_seen,
+                'last_changed': doc.last_changed,
+                'status': doc.status,
+                'source_page': doc.source_page,
+                'metadata': doc.metadata,
+            }
+            data.append(d)
+        return json.dumps(data, indent=2)
+
+    def export_csv(self) -> str:
+        """Export the entire catalog as a CSV string."""
+        docs = self.get_all_documents()
+        output = io.StringIO()
+        fieldnames = [
+            'id', 'url', 'filename', 'content_hash', 'size_bytes',
+            'first_seen', 'last_seen', 'last_changed', 'status',
+            'source_page', 'page_count', 'title', 'author'
+        ]
+        writer = csv.DictWriter(output, fieldnames=fieldnames)
+        writer.writeheader()
+        for doc in docs:
+            meta = doc.metadata or {}
+            writer.writerow({
+                'id': doc.id,
+                'url': doc.url,
+                'filename': doc.filename,
+                'content_hash': doc.content_hash,
+                'size_bytes': doc.size_bytes,
+                'first_seen': doc.first_seen,
+                'last_seen': doc.last_seen,
+                'last_changed': doc.last_changed,
+                'status': doc.status,
+                'source_page': doc.source_page,
+                'page_count': meta.get('page_count', ''),
+                'title': meta.get('title', ''),
+                'author': meta.get('author', ''),
+            })
+        return output.getvalue()
+
+    def close(self) -> None:
+        """Close the database connection."""
+        self._conn.close()
+
+    @property
+    def document_count(self) -> int:
+        """Total number of documents in the catalog."""
+        row = self._conn.execute("SELECT COUNT(*) as cnt FROM documents").fetchone()
+        return row['cnt']
+
+    @property
+    def active_count(self) -> int:
+        """Number of active documents in the catalog."""
+        row = self._conn.execute(
+            "SELECT COUNT(*) as cnt FROM documents WHERE status = 'active'"
+        ).fetchone()
+        return row['cnt']
+
+    # --- Internal helpers ---
+
+    def _get_raw(self, doc_id: str) -> Optional[sqlite3.Row]:
+        return self._conn.execute(
+            "SELECT * FROM documents WHERE id = ?", (doc_id,)
+        ).fetchone()
+
+    def _insert(self, record: DocumentRecord) -> None:
+        with self._conn:
+            self._conn.execute(
+                """INSERT INTO documents
+                   (id, url, filename, content_hash, size_bytes, first_seen,
+                    last_seen, last_changed, status, source_page, metadata)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                (record.id, record.url, record.filename, record.content_hash,
+                 record.size_bytes, record.first_seen, record.last_seen,
+                 record.last_changed, record.status, record.source_page,
+                 json.dumps(record.metadata or {}))
+            )
+
+    def _update(self, record: DocumentRecord) -> None:
+        with self._conn:
+            self._conn.execute(
+                """UPDATE documents SET
+                   url=?, filename=?, content_hash=?, size_bytes=?,
+                   last_seen=?, last_changed=?, status=?, source_page=?, metadata=?
+                   WHERE id=?""",
+                (record.url, record.filename, record.content_hash,
+                 record.size_bytes, record.last_seen, record.last_changed,
+                 record.status, record.source_page,
+                 json.dumps(record.metadata or {}), record.id)
+            )
+
+    def _row_to_record(self, row: sqlite3.Row) -> DocumentRecord:
+        meta = json.loads(row['metadata'] or '{}')
+        return DocumentRecord(
+            id=row['id'],
+            url=row['url'],
+            filename=row['filename'],
+            content_hash=row['content_hash'],
+            size_bytes=row['size_bytes'],
+            first_seen=row['first_seen'],
+            last_seen=row['last_seen'],
+            last_changed=row['last_changed'],
+            status=row['status'],
+            source_page=row['source_page'],
+            metadata=meta,
+        )
diff --git a/fetcharoo/cli.py b/fetcharoo/cli.py
index bdf5016..4209d51 100644
--- a/fetcharoo/cli.py
+++ b/fetcharoo/cli.py
@@ -17,6 +17,9 @@
 )
 from fetcharoo.filtering import FilterConfig
 
+# Subcommands that the CLI recognizes
+SUBCOMMANDS = {'diff', 'watch', 'catalog', 'schemas', 'mcp'}
+
 
 def configure_logging(quiet: int, verbose: int) -> None:
     """
@@ -53,9 +56,9 @@ def configure_logging(quiet: int, verbose: int) -> None:
     logger.setLevel(level)
 
 
-def create_parser() -> argparse.ArgumentParser:
+def create_download_parser() -> argparse.ArgumentParser:
     """
-    Create and configure the argument parser for the CLI.
+    Create the argument parser for the default download command.
 
     Returns:
         Configured ArgumentParser instance.
@@ -77,6 +80,17 @@ def create_parser() -> argparse.ArgumentParser:
 
   # Download to custom directory with custom delay
   fetcharoo https://example.com -o my_pdfs --delay 1.0
+
+  # Parallel download with 10 workers
+  fetcharoo https://example.com --concurrent --max-workers 10
+
+Subcommands:
+  fetcharoo diff <url>          Check for new/changed PDFs since last run
+  fetcharoo watch <url>         Continuously monitor a URL for changes
+  fetcharoo catalog show        View tracked documents
+  fetcharoo catalog export      Export catalog as JSON/CSV
+  fetcharoo schemas list        List available site schemas
+  fetcharoo mcp serve           Start the MCP server
         """
     )
 
@@ -158,6 +172,43 @@ def create_parser() -> argparse.ArgumentParser:
         help='show progress bars during download'
     )
 
+    # Concurrent download options
+    parser.add_argument(
+        '--concurrent',
+        action='store_true',
+        help='download PDFs in parallel using multiple threads'
+    )
+
+    parser.add_argument(
+        '--max-workers',
+        type=int,
+        default=5,
+        metavar='N',
+        help='maximum concurrent download threads (default: 5, used with --concurrent)'
+    )
+
+    # Catalog option
+    parser.add_argument(
+        '--catalog',
+        action='store_true',
+        help='track downloaded documents in the persistent catalog'
+    )
+
+    parser.add_argument(
+        '--catalog-db',
+        type=str,
+        metavar='PATH',
+        help='path to catalog database file (default: ~/.fetcharoo/catalog.db)'
+    )
+
+    # Schema option
+    parser.add_argument(
+        '--schema',
+        type=str,
+        metavar='NAME',
+        help='use a site schema (use "auto" for auto-detection, or a schema name)'
+    )
+
     # Verbosity options
     parser.add_argument(
         '-q', '--quiet',
@@ -218,6 +269,262 @@ def create_parser() -> argparse.ArgumentParser:
     return parser
 
 
+# Keep backward compatibility alias
+def create_parser() -> argparse.ArgumentParser:
+    """Create the argument parser (alias for create_download_parser)."""
+    return create_download_parser()
+
+
+def _handle_diff(argv: list) -> int:
+    """Handle the 'diff' subcommand."""
+    from fetcharoo.catalog import DocumentCatalog
+    from fetcharoo.watcher import diff_once
+    from fetcharoo.notifications import has_changes
+
+    parser = argparse.ArgumentParser(prog='fetcharoo diff', description='Check for new/changed PDFs since last run')
+    parser.add_argument('url', type=str, help='URL to check for changes')
+    parser.add_argument('-d', '--depth', type=int, default=0, help='recursion depth')
+    parser.add_argument('--format', type=str, choices=['text', 'json'], default='text', help='output format')
+    parser.add_argument('--catalog-db', type=str, metavar='PATH', help='catalog database path')
+    parser.add_argument('--delay', type=float, default=0.5, help='request delay')
+    parser.add_argument('--timeout', type=int, default=30, help='request timeout')
+    parser.add_argument('--respect-robots', action='store_true', help='respect robots.txt')
+    parser.add_argument('--user-agent', type=str, help='custom user agent')
+    parser.add_argument('-q', '--quiet', action='count', default=0)
+    parser.add_argument('-v', '--verbose', action='count', default=0)
+
+    args = parser.parse_args(argv)
+    configure_logging(args.quiet, args.verbose)
+    if args.user_agent:
+        set_default_user_agent(args.user_agent)
+
+    catalog = DocumentCatalog(db_path=args.catalog_db)
+    try:
+        diff = diff_once(
+            url=args.url,
+            catalog=catalog,
+            recursion_depth=args.depth,
+            request_delay=args.delay,
+            timeout=args.timeout,
+            respect_robots=args.respect_robots,
+            user_agent=args.user_agent,
+            output_format=args.format,
+        )
+        return 0 if has_changes(diff) else 1
+    finally:
+        catalog.close()
+
+
+def _handle_watch(argv: list) -> int:
+    """Handle the 'watch' subcommand."""
+    from fetcharoo.catalog import DocumentCatalog
+    from fetcharoo.watcher import DocumentWatcher
+
+    parser = argparse.ArgumentParser(prog='fetcharoo watch', description='Continuously monitor a URL for new/changed PDFs')
+    parser.add_argument('url', type=str, help='URL to watch')
+    parser.add_argument('-d', '--depth', type=int, default=0, help='recursion depth')
+    parser.add_argument('--interval', type=float, default=3600, metavar='SECONDS', help='check interval (default: 3600)')
+    parser.add_argument('--notify', type=str, choices=['stdout', 'json', 'webhook', 'command'], default='stdout', help='notification method')
+    parser.add_argument('--webhook', type=str, metavar='URL', help='webhook URL')
+    parser.add_argument('--on-command', type=str, metavar='CMD', help='shell command on change')
+    parser.add_argument('--catalog-db', type=str, metavar='PATH', help='catalog database path')
+    parser.add_argument('--delay', type=float, default=0.5, help='request delay')
+    parser.add_argument('--timeout', type=int, default=30, help='request timeout')
+    parser.add_argument('--respect-robots', action='store_true', help='respect robots.txt')
+    parser.add_argument('--user-agent', type=str, help='custom user agent')
+    parser.add_argument('-q', '--quiet', action='count', default=0)
+    parser.add_argument('-v', '--verbose', action='count', default=0)
+
+    args = parser.parse_args(argv)
+    configure_logging(args.quiet, args.verbose)
+    if args.user_agent:
+        set_default_user_agent(args.user_agent)
+
+    catalog = DocumentCatalog(db_path=args.catalog_db)
+    try:
+        watcher = DocumentWatcher(
+            url=args.url,
+            catalog=catalog,
+            recursion_depth=args.depth,
+            request_delay=args.delay,
+            timeout=args.timeout,
+            respect_robots=args.respect_robots,
+            user_agent=args.user_agent,
+        )
+        watcher.watch(
+            interval=args.interval,
+            notify=args.notify,
+            webhook_url=args.webhook,
+            command=args.on_command,
+        )
+        return 0
+    finally:
+        catalog.close()
+
+
+def _handle_catalog(argv: list) -> int:
+    """Handle the 'catalog' subcommand."""
+    from fetcharoo.catalog import DocumentCatalog
+
+    if not argv:
+        print("Usage: fetcharoo catalog {show|export|search|runs|duplicates}")
+        return 1
+
+    action = argv[0]
+    rest = argv[1:]
+
+    parser = argparse.ArgumentParser(prog=f'fetcharoo catalog {action}')
+    parser.add_argument('--catalog-db', type=str, metavar='PATH', help='catalog database path')
+
+    if action == 'show':
+        parser.add_argument('--source', type=str, help='filter by source URL')
+        args = parser.parse_args(rest)
+        catalog = DocumentCatalog(db_path=args.catalog_db)
+        try:
+            docs = catalog.get_active_documents(source_page=args.source)
+            if not docs:
+                print("Catalog is empty.")
+                return 0
+            print(f"Tracked documents: {len(docs)}")
+            for doc in docs:
+                status_mark = {'active': ' ', 'removed': '-', 'changed': '~'}.get(doc.status, '?')
+                size = f"{doc.size_bytes:,} bytes" if doc.size_bytes else "unknown size"
+                print(f"  [{status_mark}] {doc.url}")
+                print(f"      {doc.filename or 'unnamed'} | {size} | last seen: {doc.last_seen or 'never'}")
+            return 0
+        finally:
+            catalog.close()
+
+    elif action == 'export':
+        parser.add_argument('--format', type=str, choices=['json', 'csv'], default='json', help='export format')
+        args = parser.parse_args(rest)
+        catalog = DocumentCatalog(db_path=args.catalog_db)
+        try:
+            if args.format == 'json':
+                print(catalog.export_json())
+            else:
+                print(catalog.export_csv())
+            return 0
+        finally:
+            catalog.close()
+
+    elif action == 'search':
+        parser.add_argument('query', type=str, help='search string')
+        args = parser.parse_args(rest)
+        catalog = DocumentCatalog(db_path=args.catalog_db)
+        try:
+            docs = catalog.search(args.query)
+            if not docs:
+                print(f"No documents matching '{args.query}'")
+                return 1
+            print(f"Found {len(docs)} document(s):")
+            for doc in docs:
+                print(f"  {doc.url} [{doc.status}]")
+            return 0
+        finally:
+            catalog.close()
+
+    elif action == 'runs':
+        parser.add_argument('--limit', type=int, default=20, help='max runs to show')
+        args = parser.parse_args(rest)
+        catalog = DocumentCatalog(db_path=args.catalog_db)
+        try:
+            runs = catalog.get_runs(limit=args.limit)
+            if not runs:
+                print("No runs recorded.")
+                return 0
+            print(f"Recent runs ({len(runs)}):")
+            for run in runs:
+                print(f"  {run.timestamp} | {run.url}")
+                print(f"    found={run.documents_found} new={run.documents_new} "
+                      f"changed={run.documents_changed} removed={run.documents_removed}")
+            return 0
+        finally:
+            catalog.close()
+
+    elif action == 'duplicates':
+        args = parser.parse_args(rest)
+        catalog = DocumentCatalog(db_path=args.catalog_db)
+        try:
+            dupes = catalog.find_duplicates()
+            if not dupes:
+                print("No duplicate documents found.")
+                return 0
+            print(f"Found {len(dupes)} group(s) of duplicates:")
+            for hash_val, docs in dupes.items():
+                print(f"\n  Content hash: {hash_val[:16]}...")
+                for doc in docs:
+                    print(f"    {doc.url}")
+            return 0
+        finally:
+            catalog.close()
+
+    else:
+        print(f"Unknown catalog action: {action}")
+        print("Usage: fetcharoo catalog {show|export|search|runs|duplicates}")
+        return 1
+
+
+def _handle_schemas(argv: list) -> int:
+    """Handle the 'schemas' subcommand."""
+    from fetcharoo.schemas import find_schema, list_schemas
+
+    if not argv:
+        print("Usage: fetcharoo schemas {list|match <url>}")
+        return 1
+
+    action = argv[0]
+
+    if action == 'list':
+        schemas = list_schemas()
+        if not schemas:
+            print("No schemas available.")
+            return 0
+        print(f"Available schemas ({len(schemas)}):")
+        for schema in schemas:
+            print(f"  {schema.name:20s} {schema.description or ''}")
+            print(f"  {'':20s} pattern: {schema.url_pattern}")
+            print(f"  {'':20s} depth: {schema.recommended_depth}, delay: {schema.request_delay}s")
+            print()
+        return 0
+
+    elif action == 'match':
+        if len(argv) < 2:
+            print("Usage: fetcharoo schemas match <url>")
+            return 1
+        url = argv[1]
+        schema = find_schema(url)
+        if schema:
+            print(f"Matched schema: {schema.name}")
+            print(f"  Description: {schema.description}")
+            print(f"  Recommended depth: {schema.recommended_depth}")
+            print(f"  Request delay: {schema.request_delay}s")
+            if schema.include_patterns:
+                print(f"  Include patterns: {schema.include_patterns}")
+            if schema.exclude_patterns:
+                print(f"  Exclude patterns: {schema.exclude_patterns}")
+        else:
+            print(f"No schema matches: {url}")
+            return 1
+        return 0
+
+    else:
+        print(f"Unknown schemas action: {action}")
+        print("Usage: fetcharoo schemas {list|match <url>}")
+        return 1
+
+
+def _handle_mcp(argv: list) -> int:
+    """Handle the 'mcp' subcommand."""
+    if not argv or argv[0] != 'serve':
+        print("Usage: fetcharoo mcp serve")
+        return 1
+
+    from fetcharoo.mcp_server import main as mcp_main
+    mcp_main()
+    return 0
+
+
 def main(argv: Optional[list] = None) -> int:
     """
     Main entry point for the CLI.
@@ -228,9 +535,38 @@ def main(argv: Optional[list] = None) -> int:
     Returns:
         Exit code (0 for success, 1 for failure).
     """
-    parser = create_parser()
+    if argv is None:
+        argv = sys.argv[1:]
+
+    # Route to subcommand if the first argument is a known subcommand
+    if argv and argv[0] in SUBCOMMANDS:
+        command = argv[0]
+        rest = argv[1:]
+        try:
+            if command == 'diff':
+                return _handle_diff(rest)
+            elif command == 'watch':
+                return _handle_watch(rest)
+            elif command == 'catalog':
+                return _handle_catalog(rest)
+            elif command == 'schemas':
+                return _handle_schemas(rest)
+            elif command == 'mcp':
+                return _handle_mcp(rest)
+        except KeyboardInterrupt:
+            print("\n\nOperation cancelled by user.")
+            return 1
+        except Exception as e:
+            print(f"\nError: {e}", file=sys.stderr)
+            return 1
+
+    # Default: download command
+    parser = create_download_parser()
+
+    if not argv:
+        parser.print_help()
+        sys.exit(2)
 
-    # Parse arguments
     args = parser.parse_args(argv)
 
     # Configure logging based on verbosity flags
@@ -240,6 +576,29 @@ def main(argv: Optional[list] = None) -> int:
     if args.user_agent:
         set_default_user_agent(args.user_agent)
 
+    # Auto-detect or apply schema
+    if args.schema:
+        from fetcharoo.schemas import find_schema, list_schemas
+        if args.schema == 'auto':
+            schema = find_schema(args.url)
+        else:
+            schemas = list_schemas()
+            schema = next((s for s in schemas if s.name == args.schema), None)
+
+        if schema:
+            print(f"Using schema: {schema.name}")
+            if args.depth == 0 and schema.recommended_depth > 0:
+                args.depth = schema.recommended_depth
+            if args.delay == 0.5 and schema.request_delay != 0.5:
+                args.delay = schema.request_delay
+            if args.sort_by is None and schema.sort_by:
+                args.sort_by = schema.sort_by
+            if not args.include and not args.exclude:
+                schema_filter = schema.get_filter_config()
+                if schema_filter:
+                    args.include = schema_filter.filename_include or None
+                    args.exclude = schema_filter.filename_exclude or None
+
     # Build filter config if any filtering options are provided
     filter_config = None
     if args.include or args.exclude or args.min_size or args.max_size:
@@ -286,13 +645,15 @@ def main(argv: Optional[list] = None) -> int:
         print(f"Output directory: {args.output}")
         print(f"Recursion depth: {args.depth}")
         print(f"Mode: {mode}")
+        if args.concurrent:
+            print(f"Concurrent: {args.max_workers} workers")
         if args.respect_robots:
             print("Respecting robots.txt rules")
         if filter_config:
             print("Filtering enabled")
         print()
 
-        success = download_pdfs_from_webpage(
+        result = download_pdfs_from_webpage(
             args.url,
             recursion_depth=args.depth,
             mode=mode,
@@ -305,10 +666,29 @@ def main(argv: Optional[list] = None) -> int:
             show_progress=args.progress,
             filter_config=filter_config,
             sort_by=args.sort_by,
-            output_name=args.output_name
+            output_name=args.output_name,
+            concurrent=args.concurrent,
+            max_workers=args.max_workers,
         )
 
-        if success:
+        # Optionally record in catalog
+        if args.catalog and result:
+            from fetcharoo.catalog import DocumentCatalog
+            catalog = DocumentCatalog(db_path=args.catalog_db)
+            try:
+                pdf_links = find_pdfs_from_webpage(
+                    args.url,
+                    recursion_depth=args.depth,
+                    request_delay=args.delay,
+                    timeout=args.timeout,
+                )
+                for link in pdf_links:
+                    catalog.record_discovery(link, source_page=args.url)
+                print(f"Recorded {len(pdf_links)} document(s) in catalog.")
+            finally:
+                catalog.close()
+
+        if result:
             print(f"\nSuccessfully downloaded PDFs to: {args.output}")
             return 0
         else:
diff --git a/fetcharoo/fetcharoo.py b/fetcharoo/fetcharoo.py
index be6d1a8..057edba 100644
--- a/fetcharoo/fetcharoo.py
+++ b/fetcharoo/fetcharoo.py
@@ -12,6 +12,7 @@
 from typing import List, Set, Optional, Union, Dict, Callable
 
 from fetcharoo.downloader import download_pdf
+from fetcharoo.async_downloader import download_pdfs_concurrent
 from fetcharoo.pdf_utils import merge_pdfs, save_pdf_to_file
 from fetcharoo.filtering import FilterConfig, should_download_pdf
 
@@ -433,7 +434,9 @@ def process_pdfs(
     filter_config: Optional[FilterConfig] = None,
     sort_by: Optional[str] = None,
     sort_key: Optional[Callable[[str], any]] = None,
-    output_name: Optional[str] = None
+    output_name: Optional[str] = None,
+    concurrent: bool = False,
+    max_workers: int = 5
 ) -> ProcessResult:
     """
     Download and process each PDF file based on the specified mode ('separate' or 'merge').
@@ -455,6 +458,8 @@ def process_pdfs(
                   Takes precedence over sort_by if both are provided.
         output_name: Custom filename for merged PDF output. Only used in 'merge' mode.
                     Defaults to 'merged.pdf' if not specified.
+        concurrent: If True, download PDFs in parallel using a thread pool. Defaults to False.
+        max_workers: Maximum number of concurrent download threads. Defaults to 5.
 
     Returns:
         ProcessResult with detailed information about the operation.
@@ -507,15 +512,32 @@ def process_pdfs(
     if user_agent is None:
         user_agent = get_default_user_agent()
 
-    # Download PDF contents with optional progress bar
-    if show_progress:
-        pdf_contents = [download_pdf(pdf_link, timeout, user_agent=user_agent) for pdf_link in tqdm(pdf_links, desc="Downloading PDFs")]
+    # Download PDF contents
+    if concurrent and len(pdf_links) > 1:
+        # Parallel downloads using thread pool
+        progress_bar = tqdm(total=len(pdf_links), desc="Downloading PDFs") if show_progress else None
+        callback = (lambda: progress_bar.update(1)) if progress_bar else None
+        download_results = download_pdfs_concurrent(
+            pdf_links,
+            max_workers=max_workers,
+            timeout=timeout,
+            user_agent=user_agent,
+            progress_callback=callback,
+        )
+        if progress_bar:
+            progress_bar.close()
+        pdf_contents_with_links = download_results
     else:
-        pdf_contents = [download_pdf(pdf_link, timeout, user_agent=user_agent) for pdf_link in pdf_links]
+        # Sequential downloads (original behavior)
+        if show_progress:
+            pdf_contents = [download_pdf(pdf_link, timeout, user_agent=user_agent) for pdf_link in tqdm(pdf_links, desc="Downloading PDFs")]
+        else:
+            pdf_contents = [download_pdf(pdf_link, timeout, user_agent=user_agent) for pdf_link in pdf_links]
+        pdf_contents_with_links = list(zip(pdf_contents, pdf_links))
 
     # Separate valid and failed downloads
     pdf_contents_valid = []
-    for content, link in zip(pdf_contents, pdf_links):
+    for content, link in pdf_contents_with_links:
         if content is not None and content.startswith(b'%PDF'):
             pdf_contents_valid.append((content, link))
         else:
@@ -604,7 +626,9 @@ def download_pdfs_from_webpage(
     filter_config: Optional[FilterConfig] = None,
     sort_by: Optional[str] = None,
     sort_key: Optional[Callable[[str], any]] = None,
-    output_name: Optional[str] = None
+    output_name: Optional[str] = None,
+    concurrent: bool = False,
+    max_workers: int = 5
 ) -> Union[ProcessResult, Dict[str, Union[List[str], int]]]:
     """
     Download PDFs from a webpage and process them based on the specified mode.
@@ -629,6 +653,8 @@ def download_pdfs_from_webpage(
                   Takes precedence over sort_by if both are provided.
         output_name: Custom filename for merged PDF output. Only used in 'merge' mode.
                     Defaults to 'merged.pdf' if not specified.
+        concurrent: If True, download PDFs in parallel. Defaults to False.
+        max_workers: Maximum concurrent download threads. Defaults to 5.
 
     Returns:
         If dry_run=True: A dict with {"urls": [...], "count": N}
@@ -678,5 +704,7 @@ def download_pdfs_from_webpage(
         filter_config=filter_config,
         sort_by=sort_by,
         sort_key=sort_key,
-        output_name=output_name
+        output_name=output_name,
+        concurrent=concurrent,
+        max_workers=max_workers,
     )
diff --git a/fetcharoo/mcp_server.py b/fetcharoo/mcp_server.py
new file mode 100644
index 0000000..bc507b9
--- /dev/null
+++ b/fetcharoo/mcp_server.py
@@ -0,0 +1,303 @@
+"""
+MCP (Model Context Protocol) server for fetcharoo.
+
+Exposes fetcharoo's stateful capabilities as MCP tools, enabling AI agents
+to discover, download, and track PDF documents persistently.
+
+Usage:
+    fetcharoo mcp serve
+    # or directly:
+    python -m fetcharoo.mcp_server
+"""
+
+import json
+import logging
+import os
+import sys
+from typing import Optional
+
+logger = logging.getLogger('fetcharoo')
+
+
+def _check_mcp_available():
+    """Check if the MCP/FastMCP library is installed."""
+    try:
+        from mcp.server.fastmcp import FastMCP
+        return True
+    except ImportError:
+        return False
+
+
+def create_server():
+    """
+    Create and configure the fetcharoo MCP server.
+
+    Returns:
+        A configured FastMCP server instance.
+
+    Raises:
+        ImportError: If the mcp package is not installed.
+    """
+    try:
+        from mcp.server.fastmcp import FastMCP
+    except ImportError:
+        raise ImportError(
+            "MCP support requires the 'mcp' package. "
+            "Install it with: pip install 'fetcharoo[mcp]' or pip install mcp"
+        )
+
+    from fetcharoo.catalog import DocumentCatalog, DiffResult
+    from fetcharoo.fetcharoo import find_pdfs_from_webpage, download_pdfs_from_webpage
+    from fetcharoo.filtering import FilterConfig
+    from fetcharoo.watcher import diff_once
+
+    mcp = FastMCP(
+        "fetcharoo",
+        description="PDF document discovery, download, and tracking from websites",
+    )
+
+    # Shared catalog instance
+    _catalog = DocumentCatalog()
+
+    @mcp.tool()
+    def discover_pdfs(
+        url: str,
+        recursion_depth: int = 0,
+        include_patterns: Optional[list] = None,
+        exclude_patterns: Optional[list] = None,
+    ) -> str:
+        """
+        Discover all PDF documents available on a webpage.
+
+        Crawls the given URL (optionally following links to the specified depth)
+        and returns a structured list of all PDF URLs found.
+
+        Args:
+            url: The webpage URL to search for PDFs.
+            recursion_depth: How many levels of links to follow (0-5).
+            include_patterns: Filename patterns to include (e.g., ['report*.pdf']).
+            exclude_patterns: Filename patterns to exclude (e.g., ['*draft*']).
+        """
+        pdf_urls = find_pdfs_from_webpage(
+            url,
+            recursion_depth=min(recursion_depth, 5),
+        )
+
+        # Apply filtering if patterns provided
+        if include_patterns or exclude_patterns:
+            from fetcharoo.filtering import should_download_pdf
+            config = FilterConfig(
+                filename_include=include_patterns or [],
+                filename_exclude=exclude_patterns or [],
+            )
+            pdf_urls = [u for u in pdf_urls if should_download_pdf(u, filter_config=config)]
+
+        # Record discoveries in catalog
+        for pdf_url in pdf_urls:
+            _catalog.record_discovery(pdf_url, source_page=url)
+
+        return json.dumps({
+            "source_url": url,
+            "count": len(pdf_urls),
+            "pdfs": pdf_urls,
+        }, indent=2)
+
+    @mcp.tool()
+    def download_pdfs(
+        url: str,
+        output_dir: str = "output",
+        recursion_depth: int = 0,
+        merge: bool = False,
+        output_name: Optional[str] = None,
+    ) -> str:
+        """
+        Download PDF documents from a webpage with fetcharoo's full reliability
+        (retry logic, rate limiting, deduplication, security hardening).
+
+        Args:
+            url: The webpage URL to download PDFs from.
+            output_dir: Directory to save downloaded PDFs.
+            recursion_depth: How many levels of links to follow (0-5).
+            merge: If True, merge all PDFs into a single file.
+            output_name: Custom filename for merged output.
+        """
+        result = download_pdfs_from_webpage(
+            url,
+            recursion_depth=min(recursion_depth, 5),
+            mode='merge' if merge else 'separate',
+            write_dir=output_dir,
+            output_name=output_name,
+        )
+
+        return json.dumps({
+            "success": result.success,
+            "downloaded_count": result.downloaded_count,
+            "filtered_count": result.filtered_count,
+            "failed_count": result.failed_count,
+            "files_created": result.files_created,
+            "errors": result.errors,
+        }, indent=2)
+
+    @mcp.tool()
+    def catalog_query(
+        source_url: Optional[str] = None,
+    ) -> str:
+        """
+        Query the persistent document catalog.
+
+        Shows all documents fetcharoo has ever seen, with metadata including
+        when they were first/last seen, content hashes, and file sizes.
+        This is persistent memory across sessions.
+
+        Args:
+            source_url: If provided, only show documents from this source page.
+        """
+        docs = _catalog.get_active_documents(source_page=source_url)
+        return json.dumps({
+            "total_documents": len(docs),
+            "documents": [
+                {
+                    "url": d.url,
+                    "filename": d.filename,
+                    "size_bytes": d.size_bytes,
+                    "first_seen": d.first_seen,
+                    "last_seen": d.last_seen,
+                    "last_changed": d.last_changed,
+                    "status": d.status,
+                    "metadata": d.metadata,
+                }
+                for d in docs
+            ],
+        }, indent=2)
+
+    @mcp.tool()
+    def catalog_diff(
+        url: str,
+        recursion_depth: int = 0,
+    ) -> str:
+        """
+        Check what's changed since the last time fetcharoo looked at a URL.
+
+        Compares the current state of PDFs on a webpage against what's stored
+        in the catalog. Reports new, removed, and unchanged documents.
+
+        Args:
+            url: The webpage URL to check for changes.
+            recursion_depth: How many levels of links to follow (0-5).
+        """
+        current_urls = find_pdfs_from_webpage(
+            url,
+            recursion_depth=min(recursion_depth, 5),
+        )
+
+        diff = _catalog.diff(current_urls)
+
+        # Update catalog
+        for doc in diff.new:
+            _catalog.record_discovery(doc.url, source_page=url)
+        for doc in diff.removed:
+            _catalog.mark_removed(doc.url)
+        _catalog.record_run(url, diff)
+
+        return json.dumps({
+            "source_url": url,
+            "summary": {
+                "new": len(diff.new),
+                "changed": len(diff.changed),
+                "removed": len(diff.removed),
+                "unchanged": len(diff.unchanged),
+            },
+            "new_documents": [d.url for d in diff.new],
+            "removed_documents": [d.url for d in diff.removed],
+            "unchanged_documents": [d.url for d in diff.unchanged],
+        }, indent=2)
+
+    @mcp.tool()
+    def catalog_search(
+        query: str,
+    ) -> str:
+        """
+        Search across all tracked documents by URL or filename substring.
+
+        Args:
+            query: Search string to match against document URLs and filenames.
+        """
+        docs = _catalog.search(query)
+        return json.dumps({
+            "query": query,
+            "results_count": len(docs),
+            "results": [
+                {
+                    "url": d.url,
+                    "filename": d.filename,
+                    "status": d.status,
+                    "first_seen": d.first_seen,
+                    "last_seen": d.last_seen,
+                }
+                for d in docs
+            ],
+        }, indent=2)
+
+    @mcp.tool()
+    def get_document_metadata(
+        url: str,
+    ) -> str:
+        """
+        Get detailed information about a specific tracked document.
+
+        Args:
+            url: The URL of the document to look up.
+        """
+        doc = _catalog.get_document(url)
+        if doc is None:
+            return json.dumps({"error": f"Document not found: {url}"})
+
+        return json.dumps({
+            "url": doc.url,
+            "filename": doc.filename,
+            "content_hash": doc.content_hash,
+            "size_bytes": doc.size_bytes,
+            "first_seen": doc.first_seen,
+            "last_seen": doc.last_seen,
+            "last_changed": doc.last_changed,
+            "status": doc.status,
+            "source_page": doc.source_page,
+            "metadata": doc.metadata,
+        }, indent=2)
+
+    @mcp.tool()
+    def find_duplicate_documents() -> str:
+        """
+        Find documents that have identical content but different URLs.
+
+        Uses content hashing to detect when the same PDF exists at multiple URLs.
+        """
+        duplicates = _catalog.find_duplicates()
+        result = {}
+        for hash_val, docs in duplicates.items():
+            result[hash_val] = [d.url for d in docs]
+
+        return json.dumps({
+            "duplicate_groups": len(result),
+            "duplicates": result,
+        }, indent=2)
+
+    return mcp
+
+
+def main():
+    """Run the fetcharoo MCP server."""
+    if not _check_mcp_available():
+        print(
+            "Error: MCP support requires the 'mcp' package.\n"
+            "Install it with: pip install 'fetcharoo[mcp]' or pip install mcp",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    server = create_server()
+    server.run()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fetcharoo/notifications.py b/fetcharoo/notifications.py
new file mode 100644
index 0000000..fa7ff6a
--- /dev/null
+++ b/fetcharoo/notifications.py
@@ -0,0 +1,166 @@
+"""
+Notification handlers for fetcharoo watch mode.
+
+Supports multiple notification channels: stdout, JSON, webhook, and command execution.
+"""
+
+import json
+import logging
+import os
+import subprocess
+from typing import List, Optional
+
+import requests
+
+from fetcharoo.catalog import DiffResult, DocumentRecord
+
+logger = logging.getLogger('fetcharoo')
+
+
+def format_diff_text(diff: DiffResult, url: str) -> str:
+    """
+    Format a DiffResult as human-readable text with git-like prefixes.
+
+    Args:
+        diff: The DiffResult to format.
+        url: The source URL that was checked.
+
+    Returns:
+        Formatted text string.
+    """
+    lines = []
+    lines.append(f"Changes detected for: {url}")
+    lines.append(f"  New: {len(diff.new)}  Changed: {len(diff.changed)}  Removed: {len(diff.removed)}  Unchanged: {len(diff.unchanged)}")
+    lines.append("")
+
+    if diff.new:
+        for doc in diff.new:
+            lines.append(f"  + {doc.url}")
+    if diff.changed:
+        for doc in diff.changed:
+            lines.append(f"  ~ {doc.url}")
+    if diff.removed:
+        for doc in diff.removed:
+            lines.append(f"  - {doc.url}")
+
+    if not diff.new and not diff.changed and not diff.removed:
+        lines.append("  No changes detected.")
+
+    return "\n".join(lines)
+
+
+def format_diff_json(diff: DiffResult, url: str) -> str:
+    """
+    Format a DiffResult as a JSON string.
+
+    Args:
+        diff: The DiffResult to format.
+        url: The source URL that was checked.
+
+    Returns:
+        JSON string.
+    """
+    data = {
+        "source_url": url,
+        "summary": {
+            "new": len(diff.new),
+            "changed": len(diff.changed),
+            "removed": len(diff.removed),
+            "unchanged": len(diff.unchanged),
+        },
+        "new": [doc.url for doc in diff.new],
+        "changed": [doc.url for doc in diff.changed],
+        "removed": [doc.url for doc in diff.removed],
+    }
+    return json.dumps(data, indent=2)
+
+
+def notify_stdout(diff: DiffResult, url: str) -> None:
+    """Print diff to stdout in human-readable format."""
+    print(format_diff_text(diff, url))
+
+
+def notify_json(diff: DiffResult, url: str) -> None:
+    """Print diff to stdout as JSON."""
+    print(format_diff_json(diff, url))
+
+
+def notify_webhook(diff: DiffResult, url: str, webhook_url: str) -> bool:
+    """
+    POST diff as JSON to a webhook URL.
+
+    Args:
+        diff: The DiffResult to send.
+        url: The source URL that was checked.
+        webhook_url: The webhook URL to POST to.
+
+    Returns:
+        True if the webhook responded successfully.
+    """
+    payload = json.loads(format_diff_json(diff, url))
+    try:
+        response = requests.post(
+            webhook_url,
+            json=payload,
+            headers={'Content-Type': 'application/json'},
+            timeout=30,
+        )
+        response.raise_for_status()
+        logger.info(f"Webhook notification sent to {webhook_url}")
+        return True
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Webhook notification failed: {e}")
+        return False
+
+
+def notify_command(diff: DiffResult, url: str, command: str) -> int:
+    """
+    Run a shell command with change info as environment variables.
+
+    Environment variables set:
+        FETCHAROO_URL: The source URL
+        FETCHAROO_NEW_COUNT: Number of new documents
+        FETCHAROO_CHANGED_COUNT: Number of changed documents
+        FETCHAROO_REMOVED_COUNT: Number of removed documents
+        FETCHAROO_NEW_URLS: Newline-separated list of new URLs
+        FETCHAROO_CHANGED_URLS: Newline-separated list of changed URLs
+        FETCHAROO_REMOVED_URLS: Newline-separated list of removed URLs
+
+    Args:
+        diff: The DiffResult.
+        url: The source URL.
+        command: Shell command to execute.
+
+    Returns:
+        The command's exit code.
+    """
+    env = os.environ.copy()
+    env['FETCHAROO_URL'] = url
+    env['FETCHAROO_NEW_COUNT'] = str(len(diff.new))
+    env['FETCHAROO_CHANGED_COUNT'] = str(len(diff.changed))
+    env['FETCHAROO_REMOVED_COUNT'] = str(len(diff.removed))
+    env['FETCHAROO_NEW_URLS'] = "\n".join(doc.url for doc in diff.new)
+    env['FETCHAROO_CHANGED_URLS'] = "\n".join(doc.url for doc in diff.changed)
+    env['FETCHAROO_REMOVED_URLS'] = "\n".join(doc.url for doc in diff.removed)
+
+    try:
+        result = subprocess.run(
+            command, shell=True, env=env, timeout=120,
+            capture_output=True, text=True
+        )
+        if result.stdout:
+            logger.info(f"Command output: {result.stdout.strip()}")
+        if result.stderr:
+            logger.warning(f"Command stderr: {result.stderr.strip()}")
+        return result.returncode
+    except subprocess.TimeoutExpired:
+        logger.error(f"Command timed out: {command}")
+        return -1
+    except Exception as e:
+        logger.error(f"Command failed: {e}")
+        return -1
+
+
+def has_changes(diff: DiffResult) -> bool:
+    """Check if a DiffResult contains any changes."""
+    return bool(diff.new or diff.changed or diff.removed)
diff --git a/fetcharoo/schemas/__init__.py b/fetcharoo/schemas/__init__.py
index f34042d..76b0f3c 100644
--- a/fetcharoo/schemas/__init__.py
+++ b/fetcharoo/schemas/__init__.py
@@ -6,7 +6,7 @@
 for downloading PDFs from different websites.
 
 Example:
-    >>> from fetcharoo.schemas import SiteSchema
+    >>> from fetcharoo.schemas import SiteSchema, find_schema
     >>> schema = SiteSchema(
     ...     name='my_site',
     ...     url_pattern=r'https://mysite\\.com/.*',
@@ -16,8 +16,41 @@
     True
 """
 
+from typing import List, Optional
+
 from fetcharoo.schemas.base import SiteSchema
 
+
+def find_schema(url: str) -> Optional[SiteSchema]:
+    """
+    Find a built-in schema that matches the given URL.
+
+    Args:
+        url: The URL to match against registered schemas.
+
+    Returns:
+        The first matching SiteSchema, or None if no match found.
+    """
+    from fetcharoo.schemas.sites import BUILTIN_SCHEMAS
+    for schema in BUILTIN_SCHEMAS:
+        if schema.matches(url):
+            return schema
+    return None
+
+
+def list_schemas() -> List[SiteSchema]:
+    """
+    List all available built-in schemas.
+
+    Returns:
+        List of all registered SiteSchema instances.
+    """
+    from fetcharoo.schemas.sites import BUILTIN_SCHEMAS
+    return list(BUILTIN_SCHEMAS)
+
+
 __all__ = [
     "SiteSchema",
+    "find_schema",
+    "list_schemas",
 ]
diff --git a/fetcharoo/schemas/sites/__init__.py b/fetcharoo/schemas/sites/__init__.py
new file mode 100644
index 0000000..ed83419
--- /dev/null
+++ b/fetcharoo/schemas/sites/__init__.py
@@ -0,0 +1,29 @@
+"""
+Community site schemas for fetcharoo.
+
+Pre-built download configurations for common document repositories.
+"""
+
+from fetcharoo.schemas.sites.arxiv import ARXIV_SCHEMA
+from fetcharoo.schemas.sites.ietf_rfc import IETF_RFC_SCHEMA
+from fetcharoo.schemas.sites.sec_edgar import SEC_EDGAR_SCHEMA
+from fetcharoo.schemas.sites.w3c import W3C_SCHEMA
+from fetcharoo.schemas.sites.federal_register import FEDERAL_REGISTER_SCHEMA
+
+# Registry of all built-in schemas
+BUILTIN_SCHEMAS = [
+    ARXIV_SCHEMA,
+    IETF_RFC_SCHEMA,
+    SEC_EDGAR_SCHEMA,
+    W3C_SCHEMA,
+    FEDERAL_REGISTER_SCHEMA,
+]
+
+__all__ = [
+    "ARXIV_SCHEMA",
+    "IETF_RFC_SCHEMA",
+    "SEC_EDGAR_SCHEMA",
+    "W3C_SCHEMA",
+    "FEDERAL_REGISTER_SCHEMA",
+    "BUILTIN_SCHEMAS",
+]
diff --git a/fetcharoo/schemas/sites/arxiv.py b/fetcharoo/schemas/sites/arxiv.py
new file mode 100644
index 0000000..1b5d6a5
--- /dev/null
+++ b/fetcharoo/schemas/sites/arxiv.py
@@ -0,0 +1,20 @@
+"""Schema for downloading papers from arXiv."""
+
+from fetcharoo.schemas.base import SiteSchema
+
+ARXIV_SCHEMA = SiteSchema(
+    name="arxiv",
+    url_pattern=r"https?://arxiv\.org/(abs|pdf|html)/\d+\.\d+",
+    description="arXiv preprint server — academic papers and preprints",
+    include_patterns=[],
+    exclude_patterns=[],
+    url_include_patterns=["*arxiv.org/pdf/*"],
+    url_exclude_patterns=[],
+    sort_by="alpha",
+    default_output_name="arxiv_papers.pdf",
+    recommended_depth=0,
+    request_delay=1.0,  # arXiv rate-limits aggressively
+    test_url="https://arxiv.org/abs/2301.00001",
+    expected_min_pdfs=1,
+    version="1.0.0",
+)
diff --git a/fetcharoo/schemas/sites/federal_register.py b/fetcharoo/schemas/sites/federal_register.py
new file mode 100644
index 0000000..f0b95fa
--- /dev/null
+++ b/fetcharoo/schemas/sites/federal_register.py
@@ -0,0 +1,18 @@
+"""Schema for downloading Federal Register documents."""
+
+from fetcharoo.schemas.base import SiteSchema
+
+FEDERAL_REGISTER_SCHEMA = SiteSchema(
+    name="federal_register",
+    url_pattern=r"https?://(www\.)?federalregister\.gov/(documents|articles)/.*",
+    description="Federal Register — U.S. government rules, proposed rules, and notices",
+    include_patterns=[],
+    exclude_patterns=[],
+    url_include_patterns=["*federalregister.gov/*"],
+    url_exclude_patterns=["*/comments/*", "*/docket*"],
+    sort_by="alpha",
+    default_output_name="federal_register.pdf",
+    recommended_depth=1,
+    request_delay=1.0,  # Be respectful to government servers
+    version="1.0.0",
+)
diff --git a/fetcharoo/schemas/sites/ietf_rfc.py b/fetcharoo/schemas/sites/ietf_rfc.py
new file mode 100644
index 0000000..39a9035
--- /dev/null
+++ b/fetcharoo/schemas/sites/ietf_rfc.py
@@ -0,0 +1,20 @@
+"""Schema for downloading IETF RFCs."""
+
+from fetcharoo.schemas.base import SiteSchema
+
+IETF_RFC_SCHEMA = SiteSchema(
+    name="ietf_rfc",
+    url_pattern=r"https?://(www\.)?rfc-editor\.org/.*|https?://datatracker\.ietf\.org/doc/.*",
+    description="IETF RFC Editor — Internet standards and specifications",
+    include_patterns=["rfc*.pdf"],
+    exclude_patterns=["*draft*"],
+    url_include_patterns=[],
+    url_exclude_patterns=["*/obsoleted-by/*"],
+    sort_by="numeric",
+    default_output_name="rfc_collection.pdf",
+    recommended_depth=1,
+    request_delay=0.5,
+    test_url="https://www.rfc-editor.org/rfc/rfc9110",
+    expected_min_pdfs=1,
+    version="1.0.0",
+)
diff --git a/fetcharoo/schemas/sites/sec_edgar.py b/fetcharoo/schemas/sites/sec_edgar.py
new file mode 100644
index 0000000..e98d46a
--- /dev/null
+++ b/fetcharoo/schemas/sites/sec_edgar.py
@@ -0,0 +1,20 @@
+"""Schema for downloading SEC EDGAR filings."""
+
+from fetcharoo.schemas.base import SiteSchema
+
+SEC_EDGAR_SCHEMA = SiteSchema(
+    name="sec_edgar",
+    url_pattern=r"https?://(www\.)?sec\.gov/cgi-bin/browse-edgar.*|https?://efts\.sec\.gov/.*|https?://(www\.)?sec\.gov/Archives/.*",
+    description="SEC EDGAR — Company filings (10-K, 10-Q, 8-K, etc.)",
+    include_patterns=[],
+    exclude_patterns=["*R9999*", "*ex21*"],
+    url_include_patterns=["*Archives/edgar/data/*"],
+    url_exclude_patterns=["*/index.*"],
+    sort_by="alpha",
+    default_output_name="sec_filing.pdf",
+    recommended_depth=2,
+    request_delay=0.5,  # SEC asks for <= 10 requests/second
+    test_url="https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=apple",
+    expected_min_pdfs=1,
+    version="1.0.0",
+)
diff --git a/fetcharoo/schemas/sites/w3c.py b/fetcharoo/schemas/sites/w3c.py
new file mode 100644
index 0000000..e3d20bd
--- /dev/null
+++ b/fetcharoo/schemas/sites/w3c.py
@@ -0,0 +1,18 @@
+"""Schema for downloading W3C specifications."""
+
+from fetcharoo.schemas.base import SiteSchema
+
+W3C_SCHEMA = SiteSchema(
+    name="w3c",
+    url_pattern=r"https?://(www\.)?w3\.org/(TR|standards)/.*",
+    description="W3C — Web standards and technical reports",
+    include_patterns=[],
+    exclude_patterns=["*diff*", "*review*"],
+    url_include_patterns=["*w3.org/TR/*"],
+    url_exclude_patterns=["*/WD-*"],  # Exclude working drafts by default
+    sort_by="alpha",
+    default_output_name="w3c_specs.pdf",
+    recommended_depth=1,
+    request_delay=0.5,
+    version="1.0.0",
+)
diff --git a/fetcharoo/watcher.py b/fetcharoo/watcher.py
new file mode 100644
index 0000000..e2e72fb
--- /dev/null
+++ b/fetcharoo/watcher.py
@@ -0,0 +1,202 @@
+"""
+Document change monitoring for fetcharoo.
+
+Provides watch mode to detect new, changed, and removed PDFs on a website.
+Supports one-shot diff (cron-friendly) and continuous watch modes.
+"""
+
+import logging
+import signal
+import time
+from typing import List, Optional
+
+from fetcharoo.catalog import DiffResult, DocumentCatalog
+from fetcharoo.fetcharoo import find_pdfs_from_webpage
+from fetcharoo.notifications import (
+    format_diff_json,
+    format_diff_text,
+    has_changes,
+    notify_command,
+    notify_json,
+    notify_stdout,
+    notify_webhook,
+)
+
+logger = logging.getLogger('fetcharoo')
+
+
+class DocumentWatcher:
+    """
+    Watches a URL for document changes over time.
+
+    Uses a DocumentCatalog for persistent state and supports
+    multiple notification methods.
+    """
+
+    def __init__(
+        self,
+        url: str,
+        catalog: DocumentCatalog,
+        recursion_depth: int = 0,
+        request_delay: float = 0.5,
+        timeout: int = 30,
+        respect_robots: bool = False,
+        user_agent: Optional[str] = None,
+    ):
+        self.url = url
+        self.catalog = catalog
+        self.recursion_depth = recursion_depth
+        self.request_delay = request_delay
+        self.timeout = timeout
+        self.respect_robots = respect_robots
+        self.user_agent = user_agent
+        self._stop = False
+
+    def check_once(self) -> DiffResult:
+        """
+        Perform a single check: crawl the URL, compare against catalog.
+
+        Returns:
+            DiffResult with new, changed, removed, unchanged documents.
+        """
+        # Discover current PDFs
+        current_urls = find_pdfs_from_webpage(
+            self.url,
+            recursion_depth=self.recursion_depth,
+            request_delay=self.request_delay,
+            timeout=self.timeout,
+            respect_robots=self.respect_robots,
+            user_agent=self.user_agent,
+        )
+
+        # Compare against catalog
+        diff = self.catalog.diff(current_urls)
+
+        # Update catalog with new discoveries
+        for doc in diff.new:
+            self.catalog.record_discovery(
+                doc.url, source_page=self.url
+            )
+
+        # Mark removed documents
+        for doc in diff.removed:
+            self.catalog.mark_removed(doc.url)
+
+        # Record the run
+        self.catalog.record_run(self.url, diff)
+
+        return diff
+
+    def watch(
+        self,
+        interval: float = 3600,
+        notify: str = 'stdout',
+        webhook_url: Optional[str] = None,
+        command: Optional[str] = None,
+        on_change_only: bool = True,
+    ) -> None:
+        """
+        Continuously watch for changes at a regular interval.
+
+        Args:
+            interval: Seconds between checks.
+            notify: Notification method ('stdout', 'json', 'webhook', 'command').
+            webhook_url: URL for webhook notifications.
+            command: Shell command for command notifications.
+            on_change_only: If True, only notify when changes are detected.
+        """
+        # Handle graceful shutdown
+        def _signal_handler(signum, frame):
+            self._stop = True
+
+        signal.signal(signal.SIGINT, _signal_handler)
+        signal.signal(signal.SIGTERM, _signal_handler)
+
+        logger.info(f"Watching {self.url} every {interval}s (Ctrl+C to stop)")
+        print(f"Watching {self.url} every {interval:.0f}s (Ctrl+C to stop)")
+
+        while not self._stop:
+            try:
+                diff = self.check_once()
+
+                if not on_change_only or has_changes(diff):
+                    self._notify(diff, notify, webhook_url, command)
+
+                if not self._stop:
+                    time.sleep(interval)
+
+            except KeyboardInterrupt:
+                break
+            except Exception as e:
+                logger.error(f"Watch error: {e}")
+                if not self._stop:
+                    time.sleep(interval)
+
+        print("\nWatch stopped.")
+
+    def _notify(
+        self,
+        diff: DiffResult,
+        method: str,
+        webhook_url: Optional[str] = None,
+        command: Optional[str] = None,
+    ) -> None:
+        """Dispatch notification to the appropriate handler."""
+        if method == 'stdout':
+            notify_stdout(diff, self.url)
+        elif method == 'json':
+            notify_json(diff, self.url)
+        elif method == 'webhook' and webhook_url:
+            notify_webhook(diff, self.url, webhook_url)
+        elif method == 'command' and command:
+            notify_command(diff, self.url, command)
+        else:
+            notify_stdout(diff, self.url)
+
+
+def diff_once(
+    url: str,
+    catalog: DocumentCatalog,
+    recursion_depth: int = 0,
+    request_delay: float = 0.5,
+    timeout: int = 30,
+    respect_robots: bool = False,
+    user_agent: Optional[str] = None,
+    output_format: str = 'text',
+) -> DiffResult:
+    """
+    One-shot diff: compare current state against catalog and print results.
+
+    Designed for cron jobs. Returns appropriate exit-code-friendly result.
+
+    Args:
+        url: The URL to check.
+        catalog: The DocumentCatalog instance.
+        recursion_depth: Crawl depth.
+        request_delay: Delay between requests.
+        timeout: Request timeout.
+        respect_robots: Whether to respect robots.txt.
+        user_agent: Custom user agent.
+        output_format: 'text' or 'json'.
+
+    Returns:
+        The DiffResult.
+    """
+    watcher = DocumentWatcher(
+        url=url,
+        catalog=catalog,
+        recursion_depth=recursion_depth,
+        request_delay=request_delay,
+        timeout=timeout,
+        respect_robots=respect_robots,
+        user_agent=user_agent,
+    )
+
+    diff = watcher.check_once()
+
+    if output_format == 'json':
+        print(format_diff_json(diff, url))
+    else:
+        print(format_diff_text(diff, url))
+
+    return diff
diff --git a/pyproject.toml b/pyproject.toml
index 69ddfca..e7b26c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "fetcharoo"
-version = "0.2.0"
-description = "A Python library for downloading PDF files from webpages, with support for recursive link following and PDF merging."
+version = "0.3.0"
+description = "A Python library for downloading PDF files from webpages, with support for recursive link following, PDF merging, concurrent downloads, persistent document tracking, and change monitoring."
 authors = ["Mark A. Lifson, Ph.D. <mlifson@gmail.com>"]
 license = "MIT"
 readme = "README.md"
diff --git a/tests/test_async_downloader.py b/tests/test_async_downloader.py
new file mode 100644
index 0000000..19af0b0
--- /dev/null
+++ b/tests/test_async_downloader.py
@@ -0,0 +1,85 @@
+"""Tests for concurrent PDF downloading."""
+
+import unittest
+from unittest.mock import patch, MagicMock
+
+from fetcharoo.async_downloader import download_pdfs_concurrent, RateLimiter
+
+
+class TestRateLimiter(unittest.TestCase):
+    """Test the thread-safe rate limiter."""
+
+    def test_rate_limiter_creates_with_interval(self):
+        limiter = RateLimiter(min_interval=1.0)
+        self.assertEqual(limiter._min_interval, 1.0)
+
+    def test_rate_limiter_wait_does_not_error(self):
+        limiter = RateLimiter(min_interval=0.0)
+        limiter.wait()  # Should not raise
+
+
+class TestDownloadPdfsConcurrent(unittest.TestCase):
+    """Test concurrent download functionality."""
+
+    def test_empty_list_returns_empty(self):
+        result = download_pdfs_concurrent([])
+        self.assertEqual(result, [])
+
+    @patch('fetcharoo.async_downloader.download_pdf')
+    def test_downloads_all_urls(self, mock_download):
+        mock_download.return_value = b'%PDF-1.4 fake content'
+        urls = [
+            'https://example.com/a.pdf',
+            'https://example.com/b.pdf',
+            'https://example.com/c.pdf',
+        ]
+        results = download_pdfs_concurrent(urls, max_workers=3, request_delay=0.0)
+
+        self.assertEqual(len(results), 3)
+        for content, url in results:
+            self.assertEqual(content, b'%PDF-1.4 fake content')
+            self.assertIn(url, urls)
+
+    @patch('fetcharoo.async_downloader.download_pdf')
+    def test_preserves_order(self, mock_download):
+        def side_effect(url, **kwargs):
+            return url.encode()
+
+        mock_download.side_effect = side_effect
+        urls = ['https://example.com/1.pdf', 'https://example.com/2.pdf']
+        results = download_pdfs_concurrent(urls, max_workers=2, request_delay=0.0)
+
+        self.assertEqual(results[0][1], urls[0])
+        self.assertEqual(results[1][1], urls[1])
+
+    @patch('fetcharoo.async_downloader.download_pdf')
+    def test_handles_failures_gracefully(self, mock_download):
+        mock_download.side_effect = [b'%PDF content', None, b'%PDF content']
+        urls = ['https://example.com/a.pdf', 'https://example.com/b.pdf', 'https://example.com/c.pdf']
+        results = download_pdfs_concurrent(urls, max_workers=3, request_delay=0.0)
+
+        self.assertEqual(len(results), 3)
+        self.assertIsNotNone(results[0][0])
+        self.assertIsNone(results[1][0])
+        self.assertIsNotNone(results[2][0])
+
+    @patch('fetcharoo.async_downloader.download_pdf')
+    def test_progress_callback_called(self, mock_download):
+        mock_download.return_value = b'%PDF content'
+        callback = MagicMock()
+        urls = ['https://example.com/a.pdf', 'https://example.com/b.pdf']
+        download_pdfs_concurrent(urls, max_workers=2, request_delay=0.0, progress_callback=callback)
+
+        self.assertEqual(callback.call_count, 2)
+
+    @patch('fetcharoo.async_downloader.download_pdf')
+    def test_caps_workers_to_url_count(self, mock_download):
+        mock_download.return_value = b'%PDF content'
+        urls = ['https://example.com/a.pdf', 'https://example.com/b.pdf']
+        # max_workers=100 but only 2 URLs, so only 2 workers should be used
+        results = download_pdfs_concurrent(urls, max_workers=100, request_delay=0.0)
+        self.assertEqual(len(results), 2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_catalog.py b/tests/test_catalog.py
new file mode 100644
index 0000000..2cd2c5f
--- /dev/null
+++ b/tests/test_catalog.py
@@ -0,0 +1,209 @@
+"""Tests for the persistent document catalog."""
+
+import os
+import json
+import tempfile
+import unittest
+
+from fetcharoo.catalog import (
+    DocumentCatalog,
+    DocumentRecord,
+    DiffResult,
+    extract_pdf_metadata,
+    _url_id,
+    _content_hash,
+)
+
+
+class TestHelpers(unittest.TestCase):
+    """Test catalog helper functions."""
+
+    def test_url_id_deterministic(self):
+        id1 = _url_id('https://example.com/doc.pdf')
+        id2 = _url_id('https://example.com/doc.pdf')
+        self.assertEqual(id1, id2)
+
+    def test_url_id_different_for_different_urls(self):
+        id1 = _url_id('https://example.com/a.pdf')
+        id2 = _url_id('https://example.com/b.pdf')
+        self.assertNotEqual(id1, id2)
+
+    def test_content_hash_deterministic(self):
+        h1 = _content_hash(b'%PDF-1.4 content')
+        h2 = _content_hash(b'%PDF-1.4 content')
+        self.assertEqual(h1, h2)
+
+    def test_content_hash_different_for_different_content(self):
+        h1 = _content_hash(b'%PDF-1.4 content A')
+        h2 = _content_hash(b'%PDF-1.4 content B')
+        self.assertNotEqual(h1, h2)
+
+
+class TestDocumentCatalog(unittest.TestCase):
+    """Test the DocumentCatalog class."""
+
+    def setUp(self):
+        self.tmp = tempfile.mktemp(suffix='.db')
+        self.catalog = DocumentCatalog(db_path=self.tmp)
+
+    def tearDown(self):
+        self.catalog.close()
+        if os.path.exists(self.tmp):
+            os.unlink(self.tmp)
+
+    def test_empty_catalog(self):
+        self.assertEqual(self.catalog.document_count, 0)
+        self.assertEqual(self.catalog.active_count, 0)
+
+    def test_record_discovery(self):
+        doc = self.catalog.record_discovery(
+            'https://example.com/test.pdf',
+            source_page='https://example.com',
+            filename='test.pdf'
+        )
+        self.assertEqual(doc.url, 'https://example.com/test.pdf')
+        self.assertEqual(doc.filename, 'test.pdf')
+        self.assertEqual(doc.status, 'active')
+        self.assertEqual(self.catalog.document_count, 1)
+
+    def test_upsert_with_content(self):
+        content = b'%PDF-1.4 test content'
+        doc = self.catalog.upsert_document(
+            'https://example.com/test.pdf',
+            content=content,
+            filename='test.pdf'
+        )
+        self.assertIsNotNone(doc.content_hash)
+        self.assertEqual(doc.size_bytes, len(content))
+        self.assertIsNotNone(doc.first_seen)
+        self.assertIsNotNone(doc.last_seen)
+
+    def test_upsert_updates_existing(self):
+        self.catalog.record_discovery('https://example.com/test.pdf', filename='test.pdf')
+        doc = self.catalog.upsert_document(
+            'https://example.com/test.pdf',
+            content=b'%PDF-1.4 new content',
+            filename='test.pdf'
+        )
+        self.assertEqual(self.catalog.document_count, 1)
+        self.assertIsNotNone(doc.content_hash)
+
+    def test_get_document(self):
+        self.catalog.record_discovery('https://example.com/test.pdf', filename='test.pdf')
+        doc = self.catalog.get_document('https://example.com/test.pdf')
+        self.assertIsNotNone(doc)
+        self.assertEqual(doc.url, 'https://example.com/test.pdf')
+
+    def test_get_document_not_found(self):
+        doc = self.catalog.get_document('https://example.com/nonexistent.pdf')
+        self.assertIsNone(doc)
+
+    def test_mark_removed(self):
+        self.catalog.record_discovery('https://example.com/test.pdf')
+        self.catalog.mark_removed('https://example.com/test.pdf')
+        doc = self.catalog.get_document('https://example.com/test.pdf')
+        self.assertEqual(doc.status, 'removed')
+
+    def test_get_active_documents(self):
+        self.catalog.record_discovery('https://example.com/a.pdf')
+        self.catalog.record_discovery('https://example.com/b.pdf')
+        self.catalog.mark_removed('https://example.com/b.pdf')
+
+        active = self.catalog.get_active_documents()
+        self.assertEqual(len(active), 1)
+        self.assertEqual(active[0].url, 'https://example.com/a.pdf')
+
+    def test_get_active_documents_by_source(self):
+        self.catalog.record_discovery('https://example.com/a.pdf', source_page='https://example.com')
+        self.catalog.record_discovery('https://other.com/b.pdf', source_page='https://other.com')
+
+        docs = self.catalog.get_active_documents(source_page='https://example.com')
+        self.assertEqual(len(docs), 1)
+
+    def test_diff_new_documents(self):
+        current_urls = ['https://example.com/a.pdf', 'https://example.com/b.pdf']
+        diff = self.catalog.diff(current_urls)
+        self.assertEqual(len(diff.new), 2)
+        self.assertEqual(len(diff.unchanged), 0)
+        self.assertEqual(len(diff.removed), 0)
+
+    def test_diff_unchanged_documents(self):
+        self.catalog.record_discovery('https://example.com/a.pdf')
+        diff = self.catalog.diff(['https://example.com/a.pdf'])
+        self.assertEqual(len(diff.new), 0)
+        self.assertEqual(len(diff.unchanged), 1)
+        self.assertEqual(len(diff.removed), 0)
+
+    def test_diff_removed_documents(self):
+        self.catalog.record_discovery('https://example.com/a.pdf')
+        diff = self.catalog.diff([])
+        self.assertEqual(len(diff.new), 0)
+        self.assertEqual(len(diff.removed), 1)
+
+    def test_diff_mixed(self):
+        self.catalog.record_discovery('https://example.com/old.pdf')
+        diff = self.catalog.diff(['https://example.com/old.pdf', 'https://example.com/new.pdf'])
+        self.assertEqual(len(diff.new), 1)
+        self.assertEqual(len(diff.unchanged), 1)
+        self.assertEqual(len(diff.removed), 0)
+
+    def test_record_run(self):
+        diff = DiffResult(
+            new=[DocumentRecord(id='1', url='https://example.com/a.pdf')],
+            unchanged=[DocumentRecord(id='2', url='https://example.com/b.pdf')],
+        )
+        run = self.catalog.record_run('https://example.com', diff)
+        self.assertIsNotNone(run.id)
+        self.assertEqual(run.documents_new, 1)
+        self.assertEqual(run.documents_found, 2)
+
+    def test_get_runs(self):
+        diff = DiffResult()
+        self.catalog.record_run('https://example.com', diff)
+        runs = self.catalog.get_runs()
+        self.assertEqual(len(runs), 1)
+
+    def test_search(self):
+        self.catalog.record_discovery('https://example.com/report_2024.pdf', filename='report_2024.pdf')
+        self.catalog.record_discovery('https://example.com/invoice.pdf', filename='invoice.pdf')
+
+        results = self.catalog.search('report')
+        self.assertEqual(len(results), 1)
+        self.assertIn('report', results[0].url)
+
+    def test_search_no_results(self):
+        results = self.catalog.search('nonexistent')
+        self.assertEqual(len(results), 0)
+
+    def test_find_duplicates(self):
+        content = b'%PDF-1.4 identical content'
+        self.catalog.upsert_document('https://site-a.com/doc.pdf', content=content)
+        self.catalog.upsert_document('https://site-b.com/doc.pdf', content=content)
+
+        dupes = self.catalog.find_duplicates()
+        self.assertEqual(len(dupes), 1)
+
+    def test_export_json(self):
+        self.catalog.record_discovery('https://example.com/test.pdf', filename='test.pdf')
+        json_str = self.catalog.export_json()
+        data = json.loads(json_str)
+        self.assertEqual(len(data), 1)
+        self.assertEqual(data[0]['url'], 'https://example.com/test.pdf')
+
+    def test_export_csv(self):
+        self.catalog.record_discovery('https://example.com/test.pdf', filename='test.pdf')
+        csv_str = self.catalog.export_csv()
+        self.assertIn('url', csv_str)
+        self.assertIn('example.com', csv_str)
+
+
+class TestExtractPdfMetadata(unittest.TestCase):
+    """Test PDF metadata extraction."""
+
+    def test_invalid_content_returns_empty(self):
+        metadata = extract_pdf_metadata(b'not a pdf')
+        self.assertEqual(metadata, {})
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_schemas_registry.py b/tests/test_schemas_registry.py
new file mode 100644
index 0000000..3837922
--- /dev/null
+++ b/tests/test_schemas_registry.py
@@ -0,0 +1,90 @@
+"""Tests for the community site schemas registry."""
+
+import unittest
+
+from fetcharoo.schemas import find_schema, list_schemas, SiteSchema
+from fetcharoo.schemas.sites import BUILTIN_SCHEMAS
+
+
+class TestSchemaRegistry(unittest.TestCase):
+    """Test the schema registry."""
+
+    def test_list_schemas_returns_all(self):
+        schemas = list_schemas()
+        self.assertGreaterEqual(len(schemas), 5)
+
+    def test_all_schemas_are_site_schemas(self):
+        for schema in list_schemas():
+            self.assertIsInstance(schema, SiteSchema)
+
+    def test_all_schemas_have_names(self):
+        for schema in list_schemas():
+            self.assertTrue(schema.name)
+
+    def test_all_schemas_have_url_patterns(self):
+        for schema in list_schemas():
+            self.assertTrue(schema.url_pattern)
+
+    def test_all_schemas_have_descriptions(self):
+        for schema in list_schemas():
+            self.assertTrue(schema.description)
+
+    def test_schema_names_are_unique(self):
+        names = [s.name for s in list_schemas()]
+        self.assertEqual(len(names), len(set(names)))
+
+
+class TestFindSchema(unittest.TestCase):
+    """Test auto-detection of schemas by URL."""
+
+    def test_find_arxiv_schema(self):
+        schema = find_schema('https://arxiv.org/abs/2301.00001')
+        self.assertIsNotNone(schema)
+        self.assertEqual(schema.name, 'arxiv')
+
+    def test_find_ietf_rfc_schema(self):
+        schema = find_schema('https://www.rfc-editor.org/rfc/rfc9110')
+        self.assertIsNotNone(schema)
+        self.assertEqual(schema.name, 'ietf_rfc')
+
+    def test_find_w3c_schema(self):
+        schema = find_schema('https://www.w3.org/TR/css-flexbox-1/')
+        self.assertIsNotNone(schema)
+        self.assertEqual(schema.name, 'w3c')
+
+    def test_find_federal_register_schema(self):
+        schema = find_schema('https://www.federalregister.gov/documents/2024/01/01/test')
+        self.assertIsNotNone(schema)
+        self.assertEqual(schema.name, 'federal_register')
+
+    def test_no_match_returns_none(self):
+        schema = find_schema('https://random-unknown-site.com/page')
+        self.assertIsNone(schema)
+
+
+class TestBuiltinSchemas(unittest.TestCase):
+    """Test individual built-in schemas."""
+
+    def test_arxiv_schema_properties(self):
+        schema = find_schema('https://arxiv.org/abs/2301.00001')
+        self.assertEqual(schema.request_delay, 1.0)  # arXiv rate limits
+
+    def test_ietf_rfc_schema_excludes_drafts(self):
+        schema = find_schema('https://www.rfc-editor.org/rfc/rfc9110')
+        self.assertIn('*draft*', schema.exclude_patterns)
+        self.assertEqual(schema.sort_by, 'numeric')
+
+    def test_sec_edgar_schema_depth(self):
+        schema = find_schema('https://www.sec.gov/Archives/edgar/data/12345')
+        self.assertIsNotNone(schema)
+        self.assertEqual(schema.recommended_depth, 2)
+
+    def test_schema_get_filter_config(self):
+        schema = find_schema('https://www.rfc-editor.org/rfc/rfc9110')
+        config = schema.get_filter_config()
+        self.assertIsNotNone(config)
+        self.assertTrue(config.filename_include or config.filename_exclude)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_watcher.py b/tests/test_watcher.py
new file mode 100644
index 0000000..e9daf90
--- /dev/null
+++ b/tests/test_watcher.py
@@ -0,0 +1,148 @@
+"""Tests for watch mode and document change monitoring."""
+
+import os
+import tempfile
+import unittest
+from unittest.mock import patch, MagicMock
+
+from fetcharoo.catalog import DocumentCatalog, DiffResult, DocumentRecord
+from fetcharoo.watcher import DocumentWatcher, diff_once
+from fetcharoo.notifications import (
+    format_diff_text,
+    format_diff_json,
+    has_changes,
+)
+
+
+class TestNotifications(unittest.TestCase):
+    """Test notification formatting."""
+
+    def _make_diff(self, new=0, changed=0, removed=0, unchanged=0):
+        return DiffResult(
+            new=[DocumentRecord(id=str(i), url=f'https://example.com/new{i}.pdf') for i in range(new)],
+            changed=[DocumentRecord(id=str(i), url=f'https://example.com/changed{i}.pdf') for i in range(changed)],
+            removed=[DocumentRecord(id=str(i), url=f'https://example.com/removed{i}.pdf') for i in range(removed)],
+            unchanged=[DocumentRecord(id=str(i), url=f'https://example.com/unchanged{i}.pdf') for i in range(unchanged)],
+        )
+
+    def test_has_changes_with_new(self):
+        diff = self._make_diff(new=1)
+        self.assertTrue(has_changes(diff))
+
+    def test_has_changes_with_removed(self):
+        diff = self._make_diff(removed=1)
+        self.assertTrue(has_changes(diff))
+
+    def test_has_changes_no_changes(self):
+        diff = self._make_diff(unchanged=3)
+        self.assertFalse(has_changes(diff))
+
+    def test_format_diff_text_new(self):
+        diff = self._make_diff(new=1)
+        text = format_diff_text(diff, 'https://example.com')
+        self.assertIn('+', text)
+        self.assertIn('new0.pdf', text)
+
+    def test_format_diff_text_removed(self):
+        diff = self._make_diff(removed=1)
+        text = format_diff_text(diff, 'https://example.com')
+        self.assertIn('-', text)
+        self.assertIn('removed0.pdf', text)
+
+    def test_format_diff_text_no_changes(self):
+        diff = self._make_diff(unchanged=1)
+        text = format_diff_text(diff, 'https://example.com')
+        self.assertIn('No changes', text)
+
+    def test_format_diff_json(self):
+        import json
+        diff = self._make_diff(new=2, removed=1)
+        result = json.loads(format_diff_json(diff, 'https://example.com'))
+        self.assertEqual(result['summary']['new'], 2)
+        self.assertEqual(result['summary']['removed'], 1)
+        self.assertEqual(len(result['new']), 2)
+
+
+class TestDocumentWatcher(unittest.TestCase):
+    """Test the DocumentWatcher class."""
+
+    def setUp(self):
+        self.tmp = tempfile.mktemp(suffix='.db')
+        self.catalog = DocumentCatalog(db_path=self.tmp)
+
+    def tearDown(self):
+        self.catalog.close()
+        if os.path.exists(self.tmp):
+            os.unlink(self.tmp)
+
+    @patch('fetcharoo.watcher.find_pdfs_from_webpage')
+    def test_check_once_detects_new(self, mock_find):
+        mock_find.return_value = ['https://example.com/a.pdf', 'https://example.com/b.pdf']
+
+        watcher = DocumentWatcher('https://example.com', self.catalog)
+        diff = watcher.check_once()
+
+        self.assertEqual(len(diff.new), 2)
+        self.assertEqual(len(diff.removed), 0)
+
+    @patch('fetcharoo.watcher.find_pdfs_from_webpage')
+    def test_check_once_detects_removed(self, mock_find):
+        # First: discover a doc
+        self.catalog.record_discovery('https://example.com/old.pdf')
+
+        # Then: it's gone
+        mock_find.return_value = []
+        watcher = DocumentWatcher('https://example.com', self.catalog)
+        diff = watcher.check_once()
+
+        self.assertEqual(len(diff.removed), 1)
+
+    @patch('fetcharoo.watcher.find_pdfs_from_webpage')
+    def test_check_once_records_run(self, mock_find):
+        mock_find.return_value = ['https://example.com/a.pdf']
+
+        watcher = DocumentWatcher('https://example.com', self.catalog)
+        watcher.check_once()
+
+        runs = self.catalog.get_runs()
+        self.assertEqual(len(runs), 1)
+
+    @patch('fetcharoo.watcher.find_pdfs_from_webpage')
+    def test_check_once_updates_catalog(self, mock_find):
+        mock_find.return_value = ['https://example.com/new.pdf']
+
+        watcher = DocumentWatcher('https://example.com', self.catalog)
+        watcher.check_once()
+
+        doc = self.catalog.get_document('https://example.com/new.pdf')
+        self.assertIsNotNone(doc)
+        self.assertEqual(doc.status, 'active')
+
+
+class TestDiffOnce(unittest.TestCase):
+    """Test the one-shot diff function."""
+
+    def setUp(self):
+        self.tmp = tempfile.mktemp(suffix='.db')
+        self.catalog = DocumentCatalog(db_path=self.tmp)
+
+    def tearDown(self):
+        self.catalog.close()
+        if os.path.exists(self.tmp):
+            os.unlink(self.tmp)
+
+    @patch('fetcharoo.watcher.find_pdfs_from_webpage')
+    def test_diff_once_text_output(self, mock_find):
+        mock_find.return_value = ['https://example.com/a.pdf']
+        diff = diff_once('https://example.com', self.catalog, output_format='text')
+        self.assertEqual(len(diff.new), 1)
+
+    @patch('fetcharoo.watcher.find_pdfs_from_webpage')
+    def test_diff_once_json_output(self, mock_find):
+        mock_find.return_value = ['https://example.com/a.pdf']
+        diff = diff_once('https://example.com', self.catalog, output_format='json')
+        self.assertEqual(len(diff.new), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 89c32e82702dc6428237c5b9bbfc1ab6fe6a1911 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 7 Apr 2026 23:35:19 +0000
Subject: [PATCH 4/6] Update README with all new features and capabilities

Rewrite README to document the 5 new enhancements: concurrent
downloads, persistent document catalog, watch mode, MCP server,
and community site schemas. Includes CLI examples, Python API
usage, and MCP server configuration.

https://claude.ai/code/session_01EFk8Enntgip8z3nqk1ppkA
---
 README.md | 535 ++++++++++++++++++++++++------------------------------
 1 file changed, 236 insertions(+), 299 deletions(-)

diff --git a/README.md b/README.md
index 71afbbf..fa7c140 100644
--- a/README.md
+++ b/README.md
@@ -4,26 +4,69 @@
 [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
-A Python library for downloading PDF files from webpages with support for recursive link following, PDF merging, and security hardening.
+A Python library for discovering, downloading, and tracking PDF documents from websites — with persistent state, change monitoring, and AI agent integration.
+
+## What fetcharoo does
+
+**Download PDFs** from websites with recursive crawling, filtering, merging, and concurrent downloads.
+
+**Track documents over time** with a persistent SQLite catalog that remembers every PDF it has ever seen — content hashes, metadata, first/last seen dates.
+
+**Detect changes** by diffing the current state of a site against the catalog. Know instantly what's new, changed, or removed.
+
+**Integrate with AI agents** via an MCP server that exposes document discovery and tracking as tools for Claude and other AI systems.
 
 ## Features
 
-- Download PDF files from a specified webpage
-- Recursive crawling with configurable depth (up to 5 levels)
-- Merge downloaded PDFs into a single file or save separately
-- **Smart merge ordering**: Sort PDFs numerically, alphabetically, or with custom sort keys
-- **Automatic deduplication**: Remove duplicate PDF URLs across pages
-- **Custom output filenames**: Name your merged PDF files
-- **Rich result reporting**: Get detailed download statistics with `ProcessResult`
-- **Command-line interface** for quick downloads
-- **Quiet/verbose modes**: Control output verbosity with `-q` and `-v` flags
-- **robots.txt compliance** for ethical web crawling
-- **Custom User-Agent** support
-- **Dry-run mode** to preview downloads
-- **Progress bars** with tqdm integration
-- **PDF filtering** by filename, URL patterns, and size
-- **Security hardening**: Domain restriction, path traversal protection, rate limiting
-- Configurable timeouts and request delays
+### Core
+- Download PDFs from webpages with recursive crawling (up to 5 levels)
+- Merge PDFs into a single file or save separately
+- Smart merge ordering (numeric, alphabetical, custom sort keys)
+- Automatic URL deduplication across pages
+- PDF filtering by filename pattern, URL pattern, and file size
+- Dry-run mode to preview before downloading
+- Progress bars with tqdm
+- Configurable timeouts, rate limiting, and request delays
+
+### Concurrent Downloads
+- Parallel downloading with configurable thread pool
+- Thread-safe rate limiting shared across workers
+- 3-5x speedup on bulk downloads
+
+### Persistent Document Catalog
+- SQLite-backed tracking of every document across runs
+- Content-hash-based change detection (SHA-256)
+- Cross-URL deduplication (same PDF at different URLs)
+- PDF metadata extraction (title, author, page count, creation date)
+- Run history with diff summaries
+- Export as JSON or CSV
+- Search by URL or filename
+
+### Watch Mode
+- **One-shot diff** (`fetcharoo diff`) — cron-friendly, compare current state against catalog
+- **Continuous watch** (`fetcharoo watch`) — poll at intervals, notify on changes
+- Notifications: stdout, JSON, webhook (POST), shell command
+- Git-like diff output: `+` new, `~` changed, `-` removed
+
+### MCP Server
+- Expose fetcharoo as an MCP server for AI agent integration
+- Tools: `discover_pdfs`, `download_pdfs`, `catalog_query`, `catalog_diff`, `catalog_search`, `get_document_metadata`, `find_duplicate_documents`
+- Stateful: AI agents get persistent memory of document history
+- Optional dependency — install with `pip install fetcharoo[mcp]`
+
+### Site Schemas
+- Pre-built configurations for common document repositories
+- Auto-detection: `--schema auto` matches URL to optimal settings
+- Built-in schemas: arXiv, IETF RFCs, SEC EDGAR, W3C, Federal Register
+- Each schema provides: URL patterns, filtering rules, rate limits, sort strategy
+
+### Security
+- Domain restriction for recursive crawling (SSRF protection)
+- Path traversal protection on filenames
+- Rate limiting between requests
+- URL validation (http/https only)
+- robots.txt compliance (optional)
+- Custom User-Agent support
 
 ## Requirements
 
@@ -32,22 +75,11 @@ A Python library for downloading PDF files from webpages with support for recurs
 
 ## Installation
 
-### Using pip
-
 ```sh
 pip install fetcharoo
-```
-
-### From GitHub (latest)
 
-```sh
-pip install git+https://github.com/MALathon/fetcharoo.git
-```
-
-### Using Poetry
-
-```sh
-poetry add fetcharoo
+# With MCP server support:
+pip install fetcharoo[mcp]
 ```
 
 ### From source
@@ -60,360 +92,265 @@ poetry install
 
 ## Command-Line Interface
 
-fetcharoo includes a CLI for quick PDF downloads:
+### Download PDFs
 
 ```sh
 # Download PDFs from a webpage
 fetcharoo https://example.com
 
-# Download with recursion and merge into one file
+# Recursive crawl + merge into one file
 fetcharoo https://example.com -d 2 -m
 
-# Merge with custom output filename and numeric sorting
-fetcharoo https://example.com -m --output-name "textbook.pdf" --sort-by numeric
-
-# List PDFs without downloading (dry run)
-fetcharoo https://example.com --dry-run
+# Parallel download with 10 workers
+fetcharoo https://example.com --concurrent --max-workers 10
 
-# Download with custom options
-fetcharoo https://example.com -o my_pdfs --delay 1.0 --progress
+# Merge with numeric sorting and custom filename
+fetcharoo https://example.com -m --sort-by numeric --output-name "textbook.pdf"
 
-# Filter PDFs by pattern
+# Filter by filename pattern
 fetcharoo https://example.com --include "report*.pdf" --exclude "*draft*"
 
-# Quiet mode (less output) or verbose mode (more output)
-fetcharoo https://example.com -q     # Quieter
-fetcharoo https://example.com -qq    # Even quieter
-fetcharoo https://example.com -v     # More verbose
-fetcharoo https://example.com -vv    # Debug level
-```
+# Dry run (list PDFs without downloading)
+fetcharoo https://example.com --dry-run
 
-### CLI Options
+# Use auto-detected site schema
+fetcharoo https://arxiv.org/abs/2301.00001 --schema auto
 
-| Option | Description |
-|--------|-------------|
-| `-o, --output DIR` | Output directory (default: output) |
-| `-d, --depth N` | Recursion depth (default: 0) |
-| `-m, --merge` | Merge all PDFs into a single file |
-| `--output-name FILENAME` | Custom filename for merged PDF (with `--merge`) |
-| `--sort-by STRATEGY` | Sort PDFs before merging: `numeric`, `alpha`, `alpha_desc`, `none` |
-| `--dry-run` | List PDFs without downloading |
-| `--delay SECONDS` | Delay between requests (default: 0.5) |
-| `--timeout SECONDS` | Request timeout (default: 30) |
-| `--user-agent STRING` | Custom User-Agent string |
-| `--respect-robots` | Respect robots.txt rules |
-| `--progress` | Show progress bars |
-| `-q, --quiet` | Reduce output verbosity (use `-qq` for even quieter) |
-| `-v, --verbose` | Increase output verbosity (use `-vv` for debug) |
-| `--include PATTERN` | Include PDFs matching pattern |
-| `--exclude PATTERN` | Exclude PDFs matching pattern |
-| `--min-size BYTES` | Minimum PDF size |
-| `--max-size BYTES` | Maximum PDF size |
+# Track downloads in the persistent catalog
+fetcharoo https://example.com --catalog
+```
 
-## Quick Start
+### Monitor for Changes
 
-```python
-from fetcharoo import download_pdfs_from_webpage
+```sh
+# One-shot diff: what's new since last check? (great for cron)
+fetcharoo diff https://example.com
 
-# Download PDFs from a webpage and merge them into a single file
-download_pdfs_from_webpage(
-    url='https://example.com',
-    recursion_depth=1,
-    mode='merge',
-    write_dir='output'
-)
-```
+# Continuous watch: check every hour
+fetcharoo watch https://example.com --interval 3600
 
-## Usage
+# Watch with webhook notification
+fetcharoo watch https://example.com --notify webhook --webhook https://hooks.example.com/notify
 
-### Basic Usage
+# Watch with shell command on change
+fetcharoo watch https://example.com --notify command --on-command "echo 'New docs found!'"
 
-```python
-from fetcharoo import download_pdfs_from_webpage
-
-# Download and save PDFs as separate files
-download_pdfs_from_webpage(
-    url='https://example.com/documents',
-    recursion_depth=0,  # Only search the specified page
-    mode='separate',
-    write_dir='downloads'
-)
+# JSON output for piping
+fetcharoo diff https://example.com --format json
 ```
 
-### With robots.txt Compliance
+### Manage the Catalog
 
-```python
-from fetcharoo import download_pdfs_from_webpage
-
-# Respect robots.txt rules
-download_pdfs_from_webpage(
-    url='https://example.com',
-    recursion_depth=2,
-    mode='merge',
-    write_dir='output',
-    respect_robots=True,
-    user_agent='MyBot/1.0'
-)
-```
+```sh
+# Show all tracked documents
+fetcharoo catalog show
 
-### Dry-Run Mode
+# Export as JSON or CSV
+fetcharoo catalog export --format json
+fetcharoo catalog export --format csv
 
-```python
-from fetcharoo import download_pdfs_from_webpage
+# Search documents
+fetcharoo catalog search "annual report"
 
-# Preview what would be downloaded
-result = download_pdfs_from_webpage(
-    url='https://example.com',
-    recursion_depth=1,
-    dry_run=True
-)
+# View run history
+fetcharoo catalog runs
 
-print(f"Found {result['count']} PDFs:")
-for url in result['urls']:
-    print(f"  - {url}")
+# Find duplicate documents (same content, different URLs)
+fetcharoo catalog duplicates
 ```
 
-### With Progress Bars
+### Site Schemas
 
-```python
-from fetcharoo import download_pdfs_from_webpage
+```sh
+# List available schemas
+fetcharoo schemas list
 
-# Show progress during download
-download_pdfs_from_webpage(
-    url='https://example.com',
-    recursion_depth=2,
-    write_dir='output',
-    show_progress=True
-)
+# Check which schema matches a URL
+fetcharoo schemas match https://arxiv.org/abs/2301.00001
 ```
 
-### PDF Filtering
+### MCP Server
 
-```python
-from fetcharoo import download_pdfs_from_webpage, FilterConfig
+```sh
+# Start the MCP server (for AI agent integration)
+fetcharoo mcp serve
+```
 
-# Filter by filename patterns and size
-filter_config = FilterConfig(
-    filename_include=['report*.pdf', 'annual*.pdf'],
-    filename_exclude=['*draft*', '*temp*'],
-    min_size=10000,  # 10KB minimum
-    max_size=50000000  # 50MB maximum
-)
+### All Download Options
 
-download_pdfs_from_webpage(
-    url='https://example.com',
-    recursion_depth=1,
-    write_dir='output',
-    filter_config=filter_config
-)
-```
+| Option | Description |
+|--------|-------------|
+| `-o, --output DIR` | Output directory (default: output) |
+| `-d, --depth N` | Recursion depth (default: 0) |
+| `-m, --merge` | Merge all PDFs into a single file |
+| `--output-name FILENAME` | Custom filename for merged PDF |
+| `--sort-by STRATEGY` | Sort: `numeric`, `alpha`, `alpha_desc`, `none` |
+| `--dry-run` | List PDFs without downloading |
+| `--concurrent` | Download in parallel |
+| `--max-workers N` | Max parallel threads (default: 5) |
+| `--catalog` | Track in persistent catalog |
+| `--catalog-db PATH` | Custom catalog database path |
+| `--schema NAME` | Use site schema (`auto` for auto-detect) |
+| `--delay SECONDS` | Delay between requests (default: 0.5) |
+| `--timeout SECONDS` | Request timeout (default: 30) |
+| `--user-agent STRING` | Custom User-Agent |
+| `--respect-robots` | Respect robots.txt |
+| `--progress` | Show progress bars |
+| `-q, --quiet` | Less output (`-qq` for even quieter) |
+| `-v, --verbose` | More output (`-vv` for debug) |
+| `--include PATTERN` | Include filename pattern |
+| `--exclude PATTERN` | Exclude filename pattern |
+| `--min-size BYTES` | Minimum PDF size |
+| `--max-size BYTES` | Maximum PDF size |
+
+## Python API
 
-### With Security Options
+### Quick Start
 
 ```python
 from fetcharoo import download_pdfs_from_webpage
 
-# Restrict crawling to specific domains
+# Download PDFs — simple
+download_pdfs_from_webpage('https://example.com', write_dir='output')
+
+# Download with concurrent workers
 download_pdfs_from_webpage(
-    url='https://example.com',
+    'https://example.com',
     recursion_depth=2,
     mode='merge',
-    write_dir='output',
-    allowed_domains={'example.com', 'docs.example.com'},
-    request_delay=1.0,  # 1 second between requests
-    timeout=60  # 60 second timeout
+    concurrent=True,
+    max_workers=10,
+    show_progress=True,
 )
 ```
 
-### Sorting and Merging
+### Document Catalog
 
 ```python
-from fetcharoo import download_pdfs_from_webpage
+from fetcharoo import DocumentCatalog
 
-# Merge chapters in numeric order (chapter_1.pdf, chapter_2.pdf, chapter_10.pdf)
-download_pdfs_from_webpage(
-    url='https://example.com/book',
-    mode='merge',
-    write_dir='output',
-    sort_by='numeric',
-    output_name='complete_book.pdf'
+catalog = DocumentCatalog()  # defaults to ~/.fetcharoo/catalog.db
+
+# Track a document
+catalog.upsert_document(
+    'https://example.com/report.pdf',
+    content=pdf_bytes,
+    source_page='https://example.com',
+    filename='report.pdf',
 )
 
-# Custom sort key function
-from fetcharoo import process_pdfs, find_pdfs_from_webpage
+# Search
+results = catalog.search('annual report')
 
-pdf_urls = find_pdfs_from_webpage('https://example.com')
-process_pdfs(
-    pdf_urls,
-    write_dir='output',
-    mode='merge',
-    sort_key=lambda url: url.split('/')[-1]  # Sort by filename
-)
+# Find duplicates (same content at different URLs)
+dupes = catalog.find_duplicates()
+
+# Diff against current state
+diff = catalog.diff(['https://example.com/a.pdf', 'https://example.com/b.pdf'])
+print(f"New: {len(diff.new)}, Removed: {len(diff.removed)}")
+
+# Export
+print(catalog.export_json())
+print(catalog.export_csv())
 ```
 
-### Using ProcessResult
+### Watch Mode
 
 ```python
-from fetcharoo import download_pdfs_from_webpage
+from fetcharoo import DocumentCatalog, DocumentWatcher
 
-# Get detailed results from download operation
-result = download_pdfs_from_webpage(
-    url='https://example.com',
-    mode='separate',
-    write_dir='output'
-)
+catalog = DocumentCatalog()
+watcher = DocumentWatcher('https://example.com', catalog, recursion_depth=1)
 
-# ProcessResult provides detailed information
-print(f"Success: {result.success}")
-print(f"Downloaded: {result.downloaded_count}")
-print(f"Failed: {result.failed_count}")
-print(f"Files created: {result.files_created}")
-print(f"Errors: {result.errors}")
+# One-shot check
+diff = watcher.check_once()
+for doc in diff.new:
+    print(f"New: {doc.url}")
 
-# ProcessResult is truthy when successful
-if result:
-    print("Download completed!")
+# Or use the convenience function
+from fetcharoo import diff_once
+diff = diff_once('https://example.com', catalog)
 ```
 
-### Finding PDFs Without Downloading
+### Site Schemas
 
 ```python
-from fetcharoo import find_pdfs_from_webpage
+from fetcharoo import find_schema, list_schemas
 
-# Just get the list of PDF URLs (deduplicated by default)
-pdf_urls = find_pdfs_from_webpage(
-    url='https://example.com',
-    recursion_depth=1
-)
+# Auto-detect schema for a URL
+schema = find_schema('https://arxiv.org/abs/2301.00001')
+print(schema.name)           # 'arxiv'
+print(schema.request_delay)  # 1.0 (arXiv rate-limits)
 
-for url in pdf_urls:
-    print(url)
+# List all available schemas
+for s in list_schemas():
+    print(f"{s.name}: {s.description}")
 ```
 
-### Custom User-Agent
+### Filtering
 
 ```python
-from fetcharoo import download_pdfs_from_webpage, set_default_user_agent
+from fetcharoo import download_pdfs_from_webpage, FilterConfig
 
-# Set a global default User-Agent
-set_default_user_agent('MyCompanyBot/1.0 (contact@example.com)')
+filter_config = FilterConfig(
+    filename_include=['report*.pdf', 'annual*.pdf'],
+    filename_exclude=['*draft*', '*temp*'],
+    url_include=['*/reports/*'],
+    url_exclude=['*/archive/*'],
+    min_size=10_000,      # 10KB minimum
+    max_size=50_000_000,  # 50MB maximum
+)
 
-# Or use per-request User-Agent
 download_pdfs_from_webpage(
-    url='https://example.com',
-    user_agent='SpecificBot/2.0'
+    'https://example.com',
+    filter_config=filter_config,
 )
 ```
 
-## API Reference
-
-### `download_pdfs_from_webpage()`
-
-Main function to find and download PDFs from a webpage.
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `url` | str | required | The webpage URL to search |
-| `recursion_depth` | int | 0 | How many levels of links to follow (max 5) |
-| `mode` | str | 'separate' | 'merge' or 'separate' |
-| `write_dir` | str | 'output' | Output directory for PDFs |
-| `allowed_domains` | set | None | Restrict crawling to these domains |
-| `request_delay` | float | 0.5 | Seconds between requests |
-| `timeout` | int | 30 | Request timeout in seconds |
-| `respect_robots` | bool | False | Whether to respect robots.txt |
-| `user_agent` | str | None | Custom User-Agent (uses default if None) |
-| `dry_run` | bool | False | Preview URLs without downloading |
-| `show_progress` | bool | False | Show progress bars |
-| `filter_config` | FilterConfig | None | PDF filtering configuration |
-| `sort_by` | str | None | Sort strategy: 'numeric', 'alpha', 'alpha_desc', 'none' |
-| `sort_key` | callable | None | Custom sort key function |
-| `output_name` | str | None | Custom filename for merged PDF |
-
-**Returns:** `ProcessResult` object with download statistics, or dict in dry-run mode.
-
-### `find_pdfs_from_webpage()`
-
-Find PDF URLs without downloading.
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `url` | str | required | The webpage URL to search |
-| `recursion_depth` | int | 0 | How many levels of links to follow |
-| `deduplicate` | bool | True | Remove duplicate PDF URLs |
-| ... | | | (plus other parameters from above) |
-
-### `process_pdfs()`
-
-Download and save a list of PDF URLs.
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `pdf_links` | list | required | List of PDF URLs to download |
-| `write_dir` | str | required | Output directory |
-| `mode` | str | 'separate' | 'merge' or 'separate' |
-| `sort_by` | str | None | Sort strategy for merging |
-| `sort_key` | callable | None | Custom sort key function |
-| `output_name` | str | None | Custom merged filename |
-
-**Returns:** `ProcessResult` object with download statistics.
-
-### `ProcessResult`
-
-Dataclass returned by download operations:
+### ProcessResult
 
 ```python
-from fetcharoo import ProcessResult
-
-# Attributes:
-result.success        # bool: True if any PDFs were processed
-result.files_created  # List[str]: Paths to created files
-result.downloaded_count  # int: Number of successful downloads
-result.filtered_count    # int: Number of PDFs filtered out
-result.failed_count      # int: Number of failed downloads
-result.errors           # List[str]: Error messages
-
-# ProcessResult is truthy when successful:
-if result:
-    print("Success!")
-```
+from fetcharoo import download_pdfs_from_webpage
 
-### `FilterConfig`
+result = download_pdfs_from_webpage('https://example.com')
 
-Configuration for PDF filtering:
+print(result.success)          # bool
+print(result.downloaded_count) # int
+print(result.failed_count)     # int
+print(result.filtered_count)   # int
+print(result.files_created)    # List[str]
+print(result.errors)           # List[str]
 
-```python
-from fetcharoo import FilterConfig
-
-config = FilterConfig(
-    filename_include=['*.pdf'],      # Patterns to include
-    filename_exclude=['*draft*'],    # Patterns to exclude
-    url_include=['*/reports/*'],     # URL patterns to include
-    url_exclude=['*/temp/*'],        # URL patterns to exclude
-    min_size=1000,                   # Minimum size in bytes
-    max_size=100000000               # Maximum size in bytes
-)
+if result:  # truthy when successful
+    print("Done!")
 ```
 
-### Utility Functions
-
-- `merge_pdfs()` - Merge multiple PDF documents
-- `is_valid_url()` - Validate URL format and scheme
-- `is_safe_domain()` - Check if domain is allowed
-- `sanitize_filename()` - Prevent path traversal attacks
-- `check_robots_txt()` - Check robots.txt permissions
-- `set_default_user_agent()` - Set default User-Agent
-- `get_default_user_agent()` - Get current default User-Agent
+## MCP Server Configuration
 
-## Security Features
+Add fetcharoo to your Claude Code or MCP client configuration:
 
-fetcharoo includes several security measures:
+```json
+{
+  "mcpServers": {
+    "fetcharoo": {
+      "command": "fetcharoo",
+      "args": ["mcp", "serve"]
+    }
+  }
+}
+```
 
-- **Domain restriction**: Limit recursive crawling to specified domains (SSRF protection)
-- **Path traversal protection**: Sanitizes filenames to prevent directory escape
-- **Rate limiting**: Configurable delays between requests
-- **Timeout handling**: Prevents hanging on slow servers
-- **URL validation**: Only allows http/https schemes
-- **robots.txt compliance**: Optional respect for crawling rules
+Once connected, AI agents can use these tools:
+
+| Tool | Description |
+|------|-------------|
+| `discover_pdfs` | Find all PDFs on a URL with filtering |
+| `download_pdfs` | Download with full reliability (retry, rate limit, dedup) |
+| `catalog_query` | Query persistent document memory |
+| `catalog_diff` | What's changed since last check? |
+| `catalog_search` | Search across all tracked documents |
+| `get_document_metadata` | Detailed info about a tracked document |
+| `find_duplicate_documents` | Same content at different URLs |
 
 ## Contributing
 

From 83eb01220e2bc9bdb5d1d19c43ddd196f79f6391 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 7 Apr 2026 23:45:35 +0000
Subject: [PATCH 5/6] Add MCP caching proxy and snapshot monitoring
 (Enhancement 6)

MCP Caching Proxy (mcp_proxy.py):
  - Wraps any upstream MCP server as a caching layer (Redis for MCP)
  - SQLite-backed ToolCache with TTL-based freshness
  - Content-hash change detection across cached calls
  - Meta-tools: _proxy_call, _cache_status, _cache_history,
    _cache_refresh, _cache_clear
  - CLI: fetcharoo proxy --server "npx trial-guide" --ttl 3600

Snapshot Monitoring (mcp_monitor.py):
  - SnapshotStore for tracking MCP tool outputs over time
  - Content-hash diffing: new/changed/removed/unchanged records
  - Works with any data source (MCP servers, REST APIs, files)
  - Nested field extraction via dot notation for record IDs
  - CLI: fetcharoo monitor {snapshot|sources|history|search}

Clinical Trials Preset (presets/clinical_trials.py):
  - Pre-configured for ClinicalTrials.gov API v2 data model
  - NCT ID extraction, human-readable formatting
  - Works with trial-guide and other clinical trials MCP servers

MCP Server updates:
  - Added snapshot_monitor, snapshot_query, snapshot_history,
    snapshot_sources, snapshot_search tools
  - AI agents get persistent change tracking for any data

All 372 tests pass (337 existing + 35 new).

https://claude.ai/code/session_01EFk8Enntgip8z3nqk1ppkA
---
 README.md                            | 101 ++++++
 fetcharoo/__init__.py                |   5 +
 fetcharoo/cli.py                     | 143 +++++++-
 fetcharoo/mcp_monitor.py             | 516 +++++++++++++++++++++++++++
 fetcharoo/mcp_proxy.py               | 419 ++++++++++++++++++++++
 fetcharoo/mcp_server.py              | 113 ++++++
 fetcharoo/presets/__init__.py        |   9 +
 fetcharoo/presets/clinical_trials.py | 140 ++++++++
 tests/test_mcp_monitor.py            | 290 +++++++++++++++
 9 files changed, 1735 insertions(+), 1 deletion(-)
 create mode 100644 fetcharoo/mcp_monitor.py
 create mode 100644 fetcharoo/mcp_proxy.py
 create mode 100644 fetcharoo/presets/__init__.py
 create mode 100644 fetcharoo/presets/clinical_trials.py
 create mode 100644 tests/test_mcp_monitor.py

diff --git a/README.md b/README.md
index fa7c140..9559390 100644
--- a/README.md
+++ b/README.md
@@ -351,6 +351,107 @@ Once connected, AI agents can use these tools:
 | `catalog_search` | Search across all tracked documents |
 | `get_document_metadata` | Detailed info about a tracked document |
 | `find_duplicate_documents` | Same content at different URLs |
+| `snapshot_monitor` | Snapshot any data and diff against previous |
+| `snapshot_query` | Get current records for a monitored source |
+| `snapshot_sources` | List all monitored data sources |
+| `snapshot_search` | Search across all snapshot records |
+
+## MCP Caching Proxy
+
+fetcharoo can wrap **any** MCP server as a caching proxy — like Redis for MCP. It sits between your AI agent and the upstream server, caching tool call results and tracking changes over time.
+
+```
+AI Agent <--MCP--> fetcharoo proxy <--MCP--> upstream server
+```
+
+### Setup
+
+```sh
+# Wrap any MCP server with caching (1-hour TTL)
+fetcharoo proxy --server "npx trial-guide" --ttl 3600
+
+# Or with a Python MCP server
+fetcharoo proxy --server "python my_server.py" --ttl 1800
+```
+
+In Claude Desktop / Claude Code config:
+```json
+{
+  "mcpServers": {
+    "trial-guide-cached": {
+      "command": "fetcharoo",
+      "args": ["proxy", "--server", "npx trial-guide", "--ttl", "3600"]
+    }
+  }
+}
+```
+
+The proxy automatically adds these meta-tools:
+
+| Tool | Description |
+|------|-------------|
+| `_proxy_call` | Call any upstream tool through the cache |
+| `_cache_status` | Show all cached entries and their freshness |
+| `_cache_history` | View change history for cached calls |
+| `_cache_refresh` | Force-refresh a cached call (bypass TTL) |
+| `_cache_clear` | Clear cache entries |
+
+### Example: Clinical Trials
+
+```sh
+# Wrap a clinical trials MCP server (e.g., trial-guide)
+fetcharoo proxy --server "npx trial-guide" --ttl 7200
+
+# Now Claude can call trial-guide tools through the cache:
+# - First call: hits upstream, caches result
+# - Subsequent calls within 2 hours: served from cache
+# - Cache refresh: shows what changed since last call
+```
+
+## Snapshot Monitoring
+
+Monitor any data source for changes over time by snapshotting results and diffing.
+
+### CLI
+
+```sh
+# Snapshot an MCP tool's output and diff against previous
+fetcharoo monitor snapshot \
+    --server "npx trial-guide" \
+    --tool search_studies \
+    --params '{"query.cond": "diabetes", "filter.overallStatus": "RECRUITING"}' \
+    --record-id-field "protocolSection.identificationModule.nctId"
+
+# List all monitored sources
+fetcharoo monitor sources
+
+# View snapshot history
+fetcharoo monitor history --source "search_studies:a1b2c3d4"
+
+# Search across all snapshots
+fetcharoo monitor search "diabetes"
+```
+
+### Python API
+
+```python
+from fetcharoo import SnapshotStore, snapshot_data
+
+store = SnapshotStore()
+
+# Snapshot any list of records (from any source)
+trials = [
+    {"nctId": "NCT001", "title": "Trial A", "status": "RECRUITING"},
+    {"nctId": "NCT002", "title": "Trial B", "status": "ACTIVE"},
+]
+diff = snapshot_data(store, "diabetes-trials", trials, record_id_field="nctId")
+
+print(f"New: {len(diff.new)}")
+print(f"Changed: {len(diff.changed)}")
+print(f"Removed: {len(diff.removed)}")
+
+# Run again later with updated data — only changes are reported
+```
 
 ## Contributing
 
diff --git a/fetcharoo/__init__.py b/fetcharoo/__init__.py
index 11ef18d..68b084d 100644
--- a/fetcharoo/__init__.py
+++ b/fetcharoo/__init__.py
@@ -34,6 +34,7 @@
 from fetcharoo.catalog import DocumentCatalog, DocumentRecord, DiffResult
 from fetcharoo.watcher import DocumentWatcher, diff_once
 from fetcharoo.schemas import SiteSchema, find_schema, list_schemas
+from fetcharoo.mcp_monitor import SnapshotStore, SnapshotDiff, snapshot_data
 
 __version__ = "0.3.0"
 
@@ -81,6 +82,10 @@
     "SiteSchema",
     "find_schema",
     "list_schemas",
+    # Snapshot monitoring
+    "SnapshotStore",
+    "SnapshotDiff",
+    "snapshot_data",
     # Version
     "__version__",
 ]
diff --git a/fetcharoo/cli.py b/fetcharoo/cli.py
index 4209d51..73d80d0 100644
--- a/fetcharoo/cli.py
+++ b/fetcharoo/cli.py
@@ -18,7 +18,7 @@
 from fetcharoo.filtering import FilterConfig
 
 # Subcommands that the CLI recognizes
-SUBCOMMANDS = {'diff', 'watch', 'catalog', 'schemas', 'mcp'}
+SUBCOMMANDS = {'diff', 'watch', 'catalog', 'schemas', 'mcp', 'proxy', 'monitor'}
 
 
 def configure_logging(quiet: int, verbose: int) -> None:
@@ -525,6 +525,143 @@ def _handle_mcp(argv: list) -> int:
     return 0
 
 
+def _handle_proxy(argv: list) -> int:
+    """Handle the 'proxy' subcommand — MCP caching proxy."""
+    parser = argparse.ArgumentParser(
+        prog='fetcharoo proxy',
+        description='Start a caching MCP proxy that wraps any upstream MCP server.',
+    )
+    parser.add_argument('--server', type=str, required=True, help='command to start upstream MCP server (e.g., "npx trial-guide")')
+    parser.add_argument('--ttl', type=float, default=3600, help='cache TTL in seconds (default: 3600, 0=no cache)')
+    parser.add_argument('--cache-db', type=str, help='path to cache database')
+
+    args = parser.parse_args(argv)
+
+    from fetcharoo.mcp_proxy import run_proxy
+    run_proxy(args.server, ttl=args.ttl, cache_db_path=args.cache_db)
+    return 0
+
+
+def _handle_monitor(argv: list) -> int:
+    """Handle the 'monitor' subcommand — snapshot and diff MCP tool outputs."""
+    if not argv:
+        print("Usage: fetcharoo monitor {snapshot|diff|sources|history|search}")
+        return 1
+
+    action = argv[0]
+    rest = argv[1:]
+
+    if action == 'snapshot':
+        parser = argparse.ArgumentParser(prog='fetcharoo monitor snapshot')
+        parser.add_argument('--server', type=str, required=True, help='MCP server command')
+        parser.add_argument('--tool', type=str, required=True, help='tool name to call')
+        parser.add_argument('--params', type=str, default='{}', help='JSON tool params')
+        parser.add_argument('--record-id-field', type=str, default='id', help='dot-notation path to record ID')
+        parser.add_argument('--results-field', type=str, help='dot-notation path to results array')
+        parser.add_argument('--source-key', type=str, help='custom source key name')
+        parser.add_argument('--catalog-db', type=str, help='database path')
+
+        args = parser.parse_args(rest)
+        params = json.loads(args.params)
+
+        import asyncio
+        from fetcharoo.mcp_monitor import SnapshotStore, snapshot_mcp_tool
+
+        store = SnapshotStore(db_path=args.catalog_db)
+        try:
+            diff = asyncio.run(snapshot_mcp_tool(
+                store=store,
+                server_command=args.server.split(),
+                tool_name=args.tool,
+                tool_params=params,
+                record_id_field=args.record_id_field,
+                source_key=args.source_key,
+                results_field=args.results_field,
+            ))
+            print(f"Source: {diff.source_key}")
+            print(f"  {diff.summary}")
+            if diff.new:
+                for r in diff.new:
+                    print(f"  + {r.record_id}")
+            if diff.changed:
+                for r in diff.changed:
+                    print(f"  ~ {r.record_id}")
+            if diff.removed:
+                for r in diff.removed:
+                    print(f"  - {r.record_id}")
+            return 0 if diff.has_changes else 1
+        finally:
+            store.close()
+
+    elif action == 'sources':
+        from fetcharoo.mcp_monitor import SnapshotStore
+        parser = argparse.ArgumentParser(prog='fetcharoo monitor sources')
+        parser.add_argument('--catalog-db', type=str, help='database path')
+        args = parser.parse_args(rest)
+
+        store = SnapshotStore(db_path=args.catalog_db)
+        try:
+            sources = store.list_sources()
+            if not sources:
+                print("No monitored sources.")
+                return 0
+            print(f"Monitored sources ({len(sources)}):")
+            for s in sources:
+                print(f"  {s['source_key']}: {s['active_count']} active records (last: {s['last_updated']})")
+            return 0
+        finally:
+            store.close()
+
+    elif action == 'history':
+        from fetcharoo.mcp_monitor import SnapshotStore
+        parser = argparse.ArgumentParser(prog='fetcharoo monitor history')
+        parser.add_argument('--source', type=str, help='filter by source key')
+        parser.add_argument('--limit', type=int, default=20)
+        parser.add_argument('--catalog-db', type=str, help='database path')
+        args = parser.parse_args(rest)
+
+        store = SnapshotStore(db_path=args.catalog_db)
+        try:
+            history = store.get_snapshot_history(args.source, args.limit)
+            if not history:
+                print("No snapshot history.")
+                return 0
+            print(f"Snapshot history ({len(history)}):")
+            for h in history:
+                print(f"  {h['timestamp']} | {h['source_key']}")
+                print(f"    records={h['record_count']} new={h['new_count']} "
+                      f"changed={h['changed_count']} removed={h['removed_count']}")
+            return 0
+        finally:
+            store.close()
+
+    elif action == 'search':
+        from fetcharoo.mcp_monitor import SnapshotStore
+        parser = argparse.ArgumentParser(prog='fetcharoo monitor search')
+        parser.add_argument('query', type=str, help='search string')
+        parser.add_argument('--source', type=str, help='filter by source key')
+        parser.add_argument('--catalog-db', type=str, help='database path')
+        args = parser.parse_args(rest)
+
+        store = SnapshotStore(db_path=args.catalog_db)
+        try:
+            results = store.search_records(args.query, args.source)
+            if not results:
+                print(f"No records matching '{args.query}'")
+                return 1
+            print(f"Found {len(results)} record(s):")
+            for r in results:
+                print(f"  [{r['source_key']}] {r['record_id']}")
+            return 0
+        finally:
+            store.close()
+
+    else:
+        print(f"Unknown monitor action: {action}")
+        print("Usage: fetcharoo monitor {snapshot|sources|history|search}")
+        return 1
+
+
 def main(argv: Optional[list] = None) -> int:
     """
     Main entry point for the CLI.
@@ -553,6 +690,10 @@ def main(argv: Optional[list] = None) -> int:
                 return _handle_schemas(rest)
             elif command == 'mcp':
                 return _handle_mcp(rest)
+            elif command == 'proxy':
+                return _handle_proxy(rest)
+            elif command == 'monitor':
+                return _handle_monitor(rest)
         except KeyboardInterrupt:
             print("\n\nOperation cancelled by user.")
             return 1
diff --git a/fetcharoo/mcp_monitor.py b/fetcharoo/mcp_monitor.py
new file mode 100644
index 0000000..e352245
--- /dev/null
+++ b/fetcharoo/mcp_monitor.py
@@ -0,0 +1,516 @@
+"""
+MCP source monitoring for fetcharoo.
+
+Snapshots the output of MCP server tools over time and diffs against
+previous snapshots to detect changes. Avoids wasteful repeated querying
+by storing full snapshots and only surfacing what's new/changed/removed.
+
+Works with any MCP server — clinical trials, document repositories,
+data feeds, etc.
+
+Usage (CLI):
+    fetcharoo monitor snapshot --server "python clinical_trials_server.py" \\
+        --tool search_studies --params '{"query.cond": "diabetes"}' \\
+        --record-id-field "protocolSection.identificationModule.nctId"
+
+    fetcharoo monitor diff --source "search_studies:diabetes"
+
+Usage (Python API):
+    from fetcharoo.mcp_monitor import SnapshotStore, snapshot_mcp_tool
+
+    store = SnapshotStore()
+    result = await snapshot_mcp_tool(
+        store=store,
+        server_command=["python", "clinical_trials_server.py"],
+        tool_name="search_studies",
+        tool_params={"query.cond": "diabetes"},
+        record_id_field="protocolSection.identificationModule.nctId",
+    )
+    print(f"New: {len(result.new)}, Changed: {len(result.changed)}, Removed: {len(result.removed)}")
+"""
+
+import hashlib
+import json
+import logging
+import os
+import sqlite3
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Union
+
+logger = logging.getLogger('fetcharoo')
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _hash_json(data: Any) -> str:
+    """Deterministic hash of a JSON-serializable value."""
+    serialized = json.dumps(data, sort_keys=True, default=str)
+    return hashlib.sha256(serialized.encode('utf-8')).hexdigest()
+
+
+def _extract_nested(obj: Any, dotted_key: str) -> Any:
+    """
+    Extract a value from a nested dict/list using dot notation.
+
+    Examples:
+        _extract_nested({"a": {"b": 1}}, "a.b") -> 1
+        _extract_nested({"items": [{"id": 1}]}, "items.0.id") -> 1
+    """
+    parts = dotted_key.split('.')
+    current = obj
+    for part in parts:
+        if current is None:
+            return None
+        if isinstance(current, dict):
+            current = current.get(part)
+        elif isinstance(current, (list, tuple)):
+            try:
+                current = current[int(part)]
+            except (ValueError, IndexError):
+                return None
+        else:
+            return None
+    return current
+
+
+@dataclass
+class SnapshotRecord:
+    """A single record within a snapshot."""
+    record_id: str
+    content_hash: str
+    data: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class SnapshotDiff:
+    """Result of comparing current snapshot against previous."""
+    source_key: str
+    timestamp: str = ''
+    new: List[SnapshotRecord] = field(default_factory=list)
+    changed: List[SnapshotRecord] = field(default_factory=list)
+    removed: List[SnapshotRecord] = field(default_factory=list)
+    unchanged: List[SnapshotRecord] = field(default_factory=list)
+
+    @property
+    def has_changes(self) -> bool:
+        return bool(self.new or self.changed or self.removed)
+
+    @property
+    def summary(self) -> str:
+        return (
+            f"new={len(self.new)} changed={len(self.changed)} "
+            f"removed={len(self.removed)} unchanged={len(self.unchanged)}"
+        )
+
+
+class SnapshotStore:
+    """
+    SQLite-backed store for MCP tool output snapshots.
+
+    Each "source" is identified by a key (e.g., "search_studies:diabetes").
+    Each source has records identified by a record_id extracted from the data.
+    Records are tracked across snapshots via content hashing.
+    """
+
+    def __init__(self, db_path: Optional[str] = None):
+        if db_path is None:
+            catalog_dir = os.path.join(os.path.expanduser('~'), '.fetcharoo')
+            os.makedirs(catalog_dir, exist_ok=True)
+            db_path = os.path.join(catalog_dir, 'catalog.db')
+
+        self.db_path = db_path
+        self._conn = sqlite3.connect(db_path)
+        self._conn.row_factory = sqlite3.Row
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        with self._conn:
+            self._conn.executescript("""
+                CREATE TABLE IF NOT EXISTS snapshots (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    source_key TEXT NOT NULL,
+                    timestamp TEXT NOT NULL,
+                    record_count INTEGER DEFAULT 0,
+                    new_count INTEGER DEFAULT 0,
+                    changed_count INTEGER DEFAULT 0,
+                    removed_count INTEGER DEFAULT 0
+                );
+
+                CREATE TABLE IF NOT EXISTS snapshot_records (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    source_key TEXT NOT NULL,
+                    record_id TEXT NOT NULL,
+                    content_hash TEXT NOT NULL,
+                    data TEXT NOT NULL DEFAULT '{}',
+                    first_seen TEXT,
+                    last_seen TEXT,
+                    last_changed TEXT,
+                    status TEXT DEFAULT 'active'
+                );
+
+                CREATE INDEX IF NOT EXISTS idx_snap_records_source
+                    ON snapshot_records(source_key);
+                CREATE INDEX IF NOT EXISTS idx_snap_records_source_record
+                    ON snapshot_records(source_key, record_id);
+                CREATE INDEX IF NOT EXISTS idx_snap_records_status
+                    ON snapshot_records(status);
+                CREATE INDEX IF NOT EXISTS idx_snapshots_source
+                    ON snapshots(source_key);
+            """)
+
+    def take_snapshot(
+        self,
+        source_key: str,
+        records: List[Dict[str, Any]],
+        record_id_field: str,
+    ) -> SnapshotDiff:
+        """
+        Store a new snapshot and diff against the previous one.
+
+        Args:
+            source_key: Identifier for this data source (e.g., "search_studies:diabetes").
+            records: List of dicts — the raw MCP tool output records.
+            record_id_field: Dot-notation path to the unique ID within each record
+                            (e.g., "protocolSection.identificationModule.nctId").
+
+        Returns:
+            SnapshotDiff showing what changed.
+        """
+        now = _now_iso()
+
+        # Build current records with IDs and hashes
+        current: Dict[str, SnapshotRecord] = {}
+        for item in records:
+            rid = _extract_nested(item, record_id_field)
+            if rid is None:
+                # Try using the whole item hash as ID
+                rid = _hash_json(item)[:16]
+            rid = str(rid)
+            current[rid] = SnapshotRecord(
+                record_id=rid,
+                content_hash=_hash_json(item),
+                data=item,
+            )
+
+        # Load previous records for this source
+        previous = self._get_active_records(source_key)
+
+        # Compute diff
+        diff = SnapshotDiff(source_key=source_key, timestamp=now)
+
+        for rid, rec in current.items():
+            if rid not in previous:
+                diff.new.append(rec)
+                self._upsert_record(source_key, rec, now, is_new=True)
+            elif previous[rid]['content_hash'] != rec.content_hash:
+                diff.changed.append(rec)
+                self._upsert_record(source_key, rec, now, is_new=False, changed=True)
+            else:
+                diff.unchanged.append(rec)
+                self._touch_record(source_key, rid, now)
+
+        for rid, row in previous.items():
+            if rid not in current:
+                removed_rec = SnapshotRecord(
+                    record_id=rid,
+                    content_hash=row['content_hash'],
+                    data=json.loads(row['data']),
+                )
+                diff.removed.append(removed_rec)
+                self._mark_removed(source_key, rid, now)
+
+        # Record the snapshot
+        with self._conn:
+            self._conn.execute(
+                """INSERT INTO snapshots
+                   (source_key, timestamp, record_count, new_count, changed_count, removed_count)
+                   VALUES (?, ?, ?, ?, ?, ?)""",
+                (source_key, now, len(current), len(diff.new),
+                 len(diff.changed), len(diff.removed))
+            )
+
+        return diff
+
+    def get_current_records(self, source_key: str) -> List[Dict[str, Any]]:
+        """Get all active records for a source."""
+        rows = self._conn.execute(
+            "SELECT * FROM snapshot_records WHERE source_key = ? AND status = 'active' ORDER BY record_id",
+            (source_key,)
+        ).fetchall()
+        return [json.loads(r['data']) for r in rows]
+
+    def get_record(self, source_key: str, record_id: str) -> Optional[Dict[str, Any]]:
+        """Get a specific record by source and ID."""
+        row = self._conn.execute(
+            "SELECT * FROM snapshot_records WHERE source_key = ? AND record_id = ?",
+            (source_key, record_id)
+        ).fetchone()
+        if row is None:
+            return None
+        return {
+            'record_id': row['record_id'],
+            'content_hash': row['content_hash'],
+            'data': json.loads(row['data']),
+            'first_seen': row['first_seen'],
+            'last_seen': row['last_seen'],
+            'last_changed': row['last_changed'],
+            'status': row['status'],
+        }
+
+    def get_snapshot_history(self, source_key: Optional[str] = None, limit: int = 20) -> List[Dict]:
+        """Get snapshot run history."""
+        if source_key:
+            rows = self._conn.execute(
+                "SELECT * FROM snapshots WHERE source_key = ? ORDER BY timestamp DESC LIMIT ?",
+                (source_key, limit)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT * FROM snapshots ORDER BY timestamp DESC LIMIT ?",
+                (limit,)
+            ).fetchall()
+        return [dict(r) for r in rows]
+
+    def list_sources(self) -> List[Dict[str, Any]]:
+        """List all tracked sources with their record counts."""
+        rows = self._conn.execute("""
+            SELECT source_key, COUNT(*) as record_count,
+                   SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active_count,
+                   MAX(last_seen) as last_updated
+            FROM snapshot_records
+            GROUP BY source_key
+            ORDER BY source_key
+        """).fetchall()
+        return [dict(r) for r in rows]
+
+    def search_records(self, query: str, source_key: Optional[str] = None) -> List[Dict[str, Any]]:
+        """Search across snapshot records by data content."""
+        pattern = f"%{query}%"
+        if source_key:
+            rows = self._conn.execute(
+                "SELECT * FROM snapshot_records WHERE source_key = ? AND data LIKE ? AND status = 'active'",
+                (source_key, pattern)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT * FROM snapshot_records WHERE data LIKE ? AND status = 'active'",
+                (pattern,)
+            ).fetchall()
+        return [
+            {
+                'source_key': r['source_key'],
+                'record_id': r['record_id'],
+                'data': json.loads(r['data']),
+                'first_seen': r['first_seen'],
+                'last_seen': r['last_seen'],
+            }
+            for r in rows
+        ]
+
+    def export_json(self, source_key: Optional[str] = None) -> str:
+        """Export snapshot records as JSON."""
+        if source_key:
+            rows = self._conn.execute(
+                "SELECT * FROM snapshot_records WHERE source_key = ? ORDER BY record_id",
+                (source_key,)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT * FROM snapshot_records ORDER BY source_key, record_id"
+            ).fetchall()
+        data = [
+            {
+                'source_key': r['source_key'],
+                'record_id': r['record_id'],
+                'content_hash': r['content_hash'],
+                'data': json.loads(r['data']),
+                'first_seen': r['first_seen'],
+                'last_seen': r['last_seen'],
+                'last_changed': r['last_changed'],
+                'status': r['status'],
+            }
+            for r in rows
+        ]
+        return json.dumps(data, indent=2)
+
+    def close(self) -> None:
+        self._conn.close()
+
+    # --- Internal helpers ---
+
+    def _get_active_records(self, source_key: str) -> Dict[str, sqlite3.Row]:
+        rows = self._conn.execute(
+            "SELECT * FROM snapshot_records WHERE source_key = ? AND status = 'active'",
+            (source_key,)
+        ).fetchall()
+        return {r['record_id']: r for r in rows}
+
+    def _upsert_record(
+        self, source_key: str, rec: SnapshotRecord, now: str,
+        is_new: bool, changed: bool = False
+    ) -> None:
+        with self._conn:
+            if is_new:
+                self._conn.execute(
+                    """INSERT OR REPLACE INTO snapshot_records
+                       (source_key, record_id, content_hash, data, first_seen, last_seen, last_changed, status)
+                       VALUES (?, ?, ?, ?, ?, ?, ?, 'active')""",
+                    (source_key, rec.record_id, rec.content_hash,
+                     json.dumps(rec.data, default=str), now, now, now)
+                )
+            else:
+                self._conn.execute(
+                    """UPDATE snapshot_records SET content_hash=?, data=?, last_seen=?,
+                       last_changed=?, status='active'
+                       WHERE source_key=? AND record_id=?""",
+                    (rec.content_hash, json.dumps(rec.data, default=str),
+                     now, now if changed else now,
+                     source_key, rec.record_id)
+                )
+
+    def _touch_record(self, source_key: str, record_id: str, now: str) -> None:
+        with self._conn:
+            self._conn.execute(
+                "UPDATE snapshot_records SET last_seen=? WHERE source_key=? AND record_id=?",
+                (now, source_key, record_id)
+            )
+
+    def _mark_removed(self, source_key: str, record_id: str, now: str) -> None:
+        with self._conn:
+            self._conn.execute(
+                "UPDATE snapshot_records SET status='removed', last_seen=? WHERE source_key=? AND record_id=?",
+                (now, source_key, record_id)
+            )
+
+
+# --- MCP Tool Snapshotting ---
+
+async def snapshot_mcp_tool(
+    store: SnapshotStore,
+    server_command: Union[str, List[str]],
+    tool_name: str,
+    tool_params: Optional[Dict[str, Any]] = None,
+    record_id_field: str = "id",
+    source_key: Optional[str] = None,
+    results_field: Optional[str] = None,
+) -> SnapshotDiff:
+    """
+    Call an MCP tool, snapshot the results, and diff against previous snapshot.
+
+    Args:
+        store: SnapshotStore for persistence.
+        server_command: Command to start the MCP server (e.g., ["python", "server.py"]).
+        tool_name: Name of the MCP tool to call.
+        tool_params: Parameters to pass to the tool.
+        record_id_field: Dot-notation path to the unique ID in each record.
+        source_key: Identifier for this source. Defaults to "{tool_name}:{params_hash}".
+        results_field: Dot-notation path to the array of records in the tool output.
+                      If None, assumes the output is already a list or tries common fields.
+
+    Returns:
+        SnapshotDiff with new/changed/removed/unchanged records.
+    """
+    try:
+        from mcp import ClientSession, StdioServerParameters
+        from mcp.client.stdio import stdio_client
+    except ImportError:
+        raise ImportError(
+            "MCP client support requires the 'mcp' package. "
+            "Install with: pip install mcp"
+        )
+
+    if tool_params is None:
+        tool_params = {}
+
+    if source_key is None:
+        params_hash = _hash_json(tool_params)[:8]
+        source_key = f"{tool_name}:{params_hash}"
+
+    if isinstance(server_command, str):
+        server_command = server_command.split()
+
+    server_params = StdioServerParameters(
+        command=server_command[0],
+        args=server_command[1:] if len(server_command) > 1 else [],
+    )
+
+    # Connect to MCP server and call the tool
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+
+            result = await session.call_tool(tool_name, arguments=tool_params)
+
+            # Parse the result
+            raw_output = _parse_mcp_result(result, results_field)
+
+    return store.take_snapshot(source_key, raw_output, record_id_field)
+
+
+def snapshot_data(
+    store: SnapshotStore,
+    source_key: str,
+    records: List[Dict[str, Any]],
+    record_id_field: str = "id",
+) -> SnapshotDiff:
+    """
+    Snapshot arbitrary data (not from MCP) and diff against previous.
+
+    This is the synchronous, non-MCP entry point. Useful for:
+    - Data from REST APIs you've already fetched
+    - Data from files or databases
+    - Testing
+
+    Args:
+        store: SnapshotStore for persistence.
+        source_key: Identifier for this data source.
+        records: List of dicts to snapshot.
+        record_id_field: Dot-notation path to unique ID in each record.
+
+    Returns:
+        SnapshotDiff with new/changed/removed/unchanged records.
+    """
+    return store.take_snapshot(source_key, records, record_id_field)
+
+
+def _parse_mcp_result(result: Any, results_field: Optional[str] = None) -> List[Dict]:
+    """Parse MCP tool result into a list of records."""
+    # MCP results have a .content list with TextContent items
+    raw_text = ""
+    if hasattr(result, 'content'):
+        for item in result.content:
+            if hasattr(item, 'text'):
+                raw_text += item.text
+
+    if not raw_text:
+        return []
+
+    # Try parsing as JSON
+    try:
+        parsed = json.loads(raw_text)
+    except json.JSONDecodeError:
+        # Not JSON — treat each line as a record
+        return [{"text": line} for line in raw_text.strip().split('\n') if line.strip()]
+
+    # If a results_field is specified, extract it
+    if results_field:
+        parsed = _extract_nested(parsed, results_field)
+        if parsed is None:
+            return []
+
+    # If it's already a list, return it
+    if isinstance(parsed, list):
+        return parsed
+
+    # If it's a dict, look for common array fields
+    if isinstance(parsed, dict):
+        for key in ('results', 'studies', 'data', 'items', 'records', 'trials'):
+            if key in parsed and isinstance(parsed[key], list):
+                return parsed[key]
+        # Single record
+        return [parsed]
+
+    return []
diff --git a/fetcharoo/mcp_proxy.py b/fetcharoo/mcp_proxy.py
new file mode 100644
index 0000000..9854034
--- /dev/null
+++ b/fetcharoo/mcp_proxy.py
@@ -0,0 +1,419 @@
+"""
+MCP caching proxy for fetcharoo.
+
+Sits between an AI agent and any upstream MCP server. Intercepts tool calls,
+caches results in SQLite, serves from cache when fresh, and provides automatic
+diff/snapshot capabilities for every proxied tool.
+
+Think of it as Redis for MCP servers.
+
+Architecture:
+    AI Agent <--MCP--> fetcharoo proxy <--MCP--> upstream server (e.g., trial-guide)
+
+The proxy:
+    1. Connects to the upstream MCP server on startup
+    2. Discovers all its tools
+    3. Re-exposes each tool with caching + diff wrappers
+    4. Adds meta-tools: _cache_diff, _cache_query, _cache_sources, _cache_clear
+
+Usage:
+    # CLI
+    fetcharoo proxy --server "npx trial-guide" --ttl 3600
+
+    # This starts a new MCP server that proxies all tools from trial-guide
+    # with 1-hour caching. Connect to it from Claude Desktop like any MCP server.
+
+    # In Claude Desktop config:
+    {
+        "mcpServers": {
+            "trial-guide-cached": {
+                "command": "fetcharoo",
+                "args": ["proxy", "--server", "npx trial-guide", "--ttl", "3600"]
+            }
+        }
+    }
+"""
+
+import hashlib
+import json
+import logging
+import os
+import sqlite3
+import time
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger('fetcharoo')
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _hash_json(data: Any) -> str:
+    serialized = json.dumps(data, sort_keys=True, default=str)
+    return hashlib.sha256(serialized.encode('utf-8')).hexdigest()
+
+
+def _cache_key(tool_name: str, arguments: Dict[str, Any]) -> str:
+    """Generate a deterministic cache key for a tool call."""
+    args_hash = _hash_json(arguments)[:12]
+    return f"{tool_name}:{args_hash}"
+
+
+class ToolCache:
+    """
+    SQLite-backed cache for MCP tool call results.
+
+    Stores tool call results with TTL-based freshness and content-hash-based
+    change detection. Supports diffing current vs. cached results.
+    """
+
+    def __init__(self, db_path: Optional[str] = None):
+        if db_path is None:
+            cache_dir = os.path.join(os.path.expanduser('~'), '.fetcharoo')
+            os.makedirs(cache_dir, exist_ok=True)
+            db_path = os.path.join(cache_dir, 'mcp_cache.db')
+
+        self.db_path = db_path
+        self._conn = sqlite3.connect(db_path)
+        self._conn.row_factory = sqlite3.Row
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        with self._conn:
+            self._conn.executescript("""
+                CREATE TABLE IF NOT EXISTS tool_cache (
+                    cache_key TEXT PRIMARY KEY,
+                    tool_name TEXT NOT NULL,
+                    arguments TEXT NOT NULL DEFAULT '{}',
+                    result_text TEXT NOT NULL,
+                    content_hash TEXT NOT NULL,
+                    cached_at TEXT NOT NULL,
+                    hit_count INTEGER DEFAULT 0,
+                    previous_hash TEXT
+                );
+
+                CREATE TABLE IF NOT EXISTS cache_history (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    cache_key TEXT NOT NULL,
+                    tool_name TEXT NOT NULL,
+                    content_hash TEXT NOT NULL,
+                    timestamp TEXT NOT NULL,
+                    changed INTEGER DEFAULT 0
+                );
+
+                CREATE INDEX IF NOT EXISTS idx_tool_cache_tool
+                    ON tool_cache(tool_name);
+                CREATE INDEX IF NOT EXISTS idx_cache_history_key
+                    ON cache_history(cache_key);
+            """)
+
+    def get(self, tool_name: str, arguments: Dict[str, Any], ttl: float = 3600) -> Optional[str]:
+        """
+        Get a cached result if it exists and is fresh.
+
+        Args:
+            tool_name: Name of the MCP tool.
+            arguments: Tool call arguments.
+            ttl: Time-to-live in seconds. 0 = always stale.
+
+        Returns:
+            Cached result text, or None if cache miss or stale.
+        """
+        key = _cache_key(tool_name, arguments)
+        row = self._conn.execute(
+            "SELECT * FROM tool_cache WHERE cache_key = ?", (key,)
+        ).fetchone()
+
+        if row is None:
+            return None
+
+        if ttl <= 0:
+            return None
+
+        cached_at = datetime.fromisoformat(row['cached_at'])
+        age = (datetime.now(timezone.utc) - cached_at).total_seconds()
+        if age > ttl:
+            return None
+
+        # Cache hit — bump counter
+        with self._conn:
+            self._conn.execute(
+                "UPDATE tool_cache SET hit_count = hit_count + 1 WHERE cache_key = ?",
+                (key,)
+            )
+
+        return row['result_text']
+
+    def put(self, tool_name: str, arguments: Dict[str, Any], result_text: str) -> bool:
+        """
+        Store a tool result in the cache.
+
+        Args:
+            tool_name: Name of the MCP tool.
+            arguments: Tool call arguments.
+            result_text: The tool's text result.
+
+        Returns:
+            True if the result is different from the previously cached value.
+        """
+        key = _cache_key(tool_name, arguments)
+        new_hash = _hash_json(result_text)
+        now = _now_iso()
+
+        # Check if content changed
+        old_row = self._conn.execute(
+            "SELECT content_hash FROM tool_cache WHERE cache_key = ?", (key,)
+        ).fetchone()
+        old_hash = old_row['content_hash'] if old_row else None
+        changed = old_hash is not None and old_hash != new_hash
+
+        with self._conn:
+            self._conn.execute(
+                """INSERT OR REPLACE INTO tool_cache
+                   (cache_key, tool_name, arguments, result_text, content_hash,
+                    cached_at, hit_count, previous_hash)
+                   VALUES (?, ?, ?, ?, ?, ?, 0, ?)""",
+                (key, tool_name, json.dumps(arguments, default=str),
+                 result_text, new_hash, now, old_hash)
+            )
+            self._conn.execute(
+                """INSERT INTO cache_history
+                   (cache_key, tool_name, content_hash, timestamp, changed)
+                   VALUES (?, ?, ?, ?, ?)""",
+                (key, tool_name, new_hash, now, 1 if changed else 0)
+            )
+
+        return changed
+
+    def get_all_entries(self, tool_name: Optional[str] = None) -> List[Dict]:
+        """List all cache entries, optionally filtered by tool name."""
+        if tool_name:
+            rows = self._conn.execute(
+                "SELECT cache_key, tool_name, arguments, cached_at, hit_count, content_hash FROM tool_cache WHERE tool_name = ?",
+                (tool_name,)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT cache_key, tool_name, arguments, cached_at, hit_count, content_hash FROM tool_cache"
+            ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_history(self, cache_key: Optional[str] = None, limit: int = 20) -> List[Dict]:
+        """Get cache change history."""
+        if cache_key:
+            rows = self._conn.execute(
+                "SELECT * FROM cache_history WHERE cache_key = ? ORDER BY timestamp DESC LIMIT ?",
+                (cache_key, limit)
+            ).fetchall()
+        else:
+            rows = self._conn.execute(
+                "SELECT * FROM cache_history ORDER BY timestamp DESC LIMIT ?",
+                (limit,)
+            ).fetchall()
+        return [dict(r) for r in rows]
+
+    def invalidate(self, tool_name: Optional[str] = None) -> int:
+        """
+        Clear cache entries.
+
+        Args:
+            tool_name: If provided, only clear entries for this tool.
+                      If None, clear everything.
+
+        Returns:
+            Number of entries removed.
+        """
+        if tool_name:
+            cursor = self._conn.execute(
+                "DELETE FROM tool_cache WHERE tool_name = ?", (tool_name,)
+            )
+        else:
+            cursor = self._conn.execute("DELETE FROM tool_cache")
+        self._conn.commit()
+        return cursor.rowcount
+
+    def close(self) -> None:
+        self._conn.close()
+
+
+def create_proxy_server(
+    upstream_command: str,
+    ttl: float = 3600,
+    cache_db_path: Optional[str] = None,
+):
+    """
+    Create an MCP proxy server that caches results from an upstream MCP server.
+
+    Args:
+        upstream_command: Shell command to start the upstream MCP server.
+        ttl: Default cache TTL in seconds (0 = no caching, always forward).
+        cache_db_path: Path to cache database.
+
+    Returns:
+        A configured FastMCP server instance.
+    """
+    try:
+        from mcp.server.fastmcp import FastMCP
+        from mcp import ClientSession, StdioServerParameters
+        from mcp.client.stdio import stdio_client
+    except ImportError:
+        raise ImportError(
+            "MCP proxy requires the 'mcp' package. "
+            "Install with: pip install mcp"
+        )
+
+    import asyncio
+
+    cache = ToolCache(db_path=cache_db_path)
+
+    parts = upstream_command.split()
+    upstream_params = StdioServerParameters(
+        command=parts[0],
+        args=parts[1:] if len(parts) > 1 else [],
+    )
+
+    proxy = FastMCP(
+        "fetcharoo-proxy",
+        description=f"Caching proxy for: {upstream_command}",
+    )
+
+    # We'll store the upstream session info for tool discovery
+    _upstream_tools: List[Dict] = []
+
+    async def _call_upstream(tool_name: str, arguments: Dict[str, Any]) -> str:
+        """Call a tool on the upstream MCP server."""
+        async with stdio_client(upstream_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                await session.initialize()
+                result = await session.call_tool(tool_name, arguments=arguments)
+
+                # Extract text from result
+                text_parts = []
+                if hasattr(result, 'content'):
+                    for item in result.content:
+                        if hasattr(item, 'text'):
+                            text_parts.append(item.text)
+                return "\n".join(text_parts)
+
+    def _call_upstream_sync(tool_name: str, arguments: Dict[str, Any]) -> str:
+        """Synchronous wrapper for calling upstream."""
+        return asyncio.run(_call_upstream(tool_name, arguments))
+
+    # --- Meta-tools (always available) ---
+
+    @proxy.tool()
+    def _cache_status() -> str:
+        """
+        Show all cached tool calls and their freshness.
+
+        Returns cache entries with their age, hit count, and whether
+        the result changed since the previous call.
+        """
+        entries = cache.get_all_entries()
+        return json.dumps({
+            "cache_entries": len(entries),
+            "ttl_seconds": ttl,
+            "entries": entries,
+        }, indent=2)
+
+    @proxy.tool()
+    def _cache_history(cache_key: Optional[str] = None, limit: int = 20) -> str:
+        """
+        View change history for cached tool calls.
+
+        Shows when each call was made and whether the result changed.
+
+        Args:
+            cache_key: Filter by specific cache key. None shows all.
+            limit: Maximum entries to return.
+        """
+        history = cache.get_history(cache_key, limit)
+        return json.dumps({
+            "history": history,
+        }, indent=2)
+
+    @proxy.tool()
+    def _cache_clear(tool_name: Optional[str] = None) -> str:
+        """
+        Clear the cache.
+
+        Args:
+            tool_name: Clear only entries for this tool. None clears everything.
+        """
+        count = cache.invalidate(tool_name)
+        return json.dumps({
+            "cleared": count,
+            "tool_name": tool_name or "all",
+        })
+
+    @proxy.tool()
+    def _cache_refresh(tool_name: str, arguments: Optional[dict] = None) -> str:
+        """
+        Force-refresh a cached tool call (bypass TTL, call upstream, cache new result).
+
+        Args:
+            tool_name: The upstream tool to call.
+            arguments: Arguments to pass. Defaults to empty dict.
+        """
+        if arguments is None:
+            arguments = {}
+
+        result_text = _call_upstream_sync(tool_name, arguments)
+        changed = cache.put(tool_name, arguments, result_text)
+
+        key = _cache_key(tool_name, arguments)
+        return json.dumps({
+            "cache_key": key,
+            "changed_since_last": changed,
+            "result": result_text,
+        }, indent=2)
+
+    @proxy.tool()
+    def _proxy_call(tool_name: str, arguments: Optional[dict] = None, bypass_cache: bool = False) -> str:
+        """
+        Call any tool on the upstream MCP server through the cache.
+
+        This is the universal proxy tool. It checks the cache first (unless
+        bypass_cache=True), calls the upstream server if needed, and caches
+        the result.
+
+        Args:
+            tool_name: Name of the upstream tool to call.
+            arguments: Arguments dict to pass to the tool.
+            bypass_cache: If True, skip cache and always call upstream.
+        """
+        if arguments is None:
+            arguments = {}
+
+        # Check cache first
+        if not bypass_cache:
+            cached = cache.get(tool_name, arguments, ttl=ttl)
+            if cached is not None:
+                key = _cache_key(tool_name, arguments)
+                return json.dumps({
+                    "_source": "cache",
+                    "_cache_key": key,
+                    "result": cached,
+                })
+
+        # Cache miss or bypass — call upstream
+        result_text = _call_upstream_sync(tool_name, arguments)
+        changed = cache.put(tool_name, arguments, result_text)
+
+        key = _cache_key(tool_name, arguments)
+        return json.dumps({
+            "_source": "upstream",
+            "_cache_key": key,
+            "_changed_since_last": changed,
+            "result": result_text,
+        }, indent=2)
+
+    return proxy
+
+
+def run_proxy(upstream_command: str, ttl: float = 3600, cache_db_path: Optional[str] = None):
+    """Start the proxy server."""
+    server = create_proxy_server(upstream_command, ttl, cache_db_path)
+    server.run()
diff --git a/fetcharoo/mcp_server.py b/fetcharoo/mcp_server.py
index bc507b9..b85f15c 100644
--- a/fetcharoo/mcp_server.py
+++ b/fetcharoo/mcp_server.py
@@ -282,6 +282,119 @@ def find_duplicate_documents() -> str:
             "duplicates": result,
         }, indent=2)
 
+    # --- Snapshot monitoring tools ---
+
+    from fetcharoo.mcp_monitor import SnapshotStore, snapshot_data
+
+    _snapshot_store = SnapshotStore()
+
+    @mcp.tool()
+    def snapshot_monitor(
+        source_key: str,
+        records: list,
+        record_id_field: str = "id",
+    ) -> str:
+        """
+        Snapshot a list of records and diff against the previous snapshot.
+
+        Use this to monitor ANY data source for changes over time — clinical trials,
+        document listings, API results, etc. Pass the data you've already fetched,
+        and fetcharoo will tell you what's new, changed, or removed since last time.
+
+        Args:
+            source_key: A name for this data source (e.g., "diabetes-trials-recruiting").
+            records: List of record dicts to snapshot.
+            record_id_field: Dot-notation path to the unique ID in each record
+                           (e.g., "protocolSection.identificationModule.nctId" for clinical trials,
+                            or "id" for simpler records).
+        """
+        diff = snapshot_data(
+            store=_snapshot_store,
+            source_key=source_key,
+            records=records,
+            record_id_field=record_id_field,
+        )
+        return json.dumps({
+            "source_key": diff.source_key,
+            "has_changes": diff.has_changes,
+            "summary": {
+                "new": len(diff.new),
+                "changed": len(diff.changed),
+                "removed": len(diff.removed),
+                "unchanged": len(diff.unchanged),
+            },
+            "new_records": [{"id": r.record_id, "data": r.data} for r in diff.new],
+            "changed_records": [{"id": r.record_id, "data": r.data} for r in diff.changed],
+            "removed_records": [{"id": r.record_id} for r in diff.removed],
+        }, indent=2)
+
+    @mcp.tool()
+    def snapshot_query(
+        source_key: str,
+    ) -> str:
+        """
+        Get all current records for a monitored data source.
+
+        Returns the latest snapshot of all active records.
+
+        Args:
+            source_key: The data source name (e.g., "diabetes-trials-recruiting").
+        """
+        records = _snapshot_store.get_current_records(source_key)
+        return json.dumps({
+            "source_key": source_key,
+            "record_count": len(records),
+            "records": records,
+        }, indent=2)
+
+    @mcp.tool()
+    def snapshot_history(
+        source_key: Optional[str] = None,
+    ) -> str:
+        """
+        View the history of snapshot runs for a data source.
+
+        Shows when each snapshot was taken and what changed.
+
+        Args:
+            source_key: Filter by source name. If None, shows all sources.
+        """
+        history = _snapshot_store.get_snapshot_history(source_key)
+        return json.dumps({
+            "runs": history,
+        }, indent=2)
+
+    @mcp.tool()
+    def snapshot_sources() -> str:
+        """
+        List all data sources being monitored via snapshots.
+
+        Shows each source with its record count and last update time.
+        """
+        sources = _snapshot_store.list_sources()
+        return json.dumps({
+            "sources": sources,
+        }, indent=2)
+
+    @mcp.tool()
+    def snapshot_search(
+        query: str,
+        source_key: Optional[str] = None,
+    ) -> str:
+        """
+        Search across all snapshot records by content.
+
+        Args:
+            query: Search string to match against record data.
+            source_key: Optionally limit search to a specific source.
+        """
+        results = _snapshot_store.search_records(query, source_key)
+        return json.dumps({
+            "query": query,
+            "results_count": len(results),
+            "results": results,
+        }, indent=2)
+
     return mcp
 
 
diff --git a/fetcharoo/presets/__init__.py b/fetcharoo/presets/__init__.py
new file mode 100644
index 0000000..4fe4613
--- /dev/null
+++ b/fetcharoo/presets/__init__.py
@@ -0,0 +1,9 @@
+"""
+Monitoring presets for common MCP servers.
+
+Each preset defines how to snapshot and diff a specific MCP server type.
+"""
+
+from fetcharoo.presets.clinical_trials import CLINICAL_TRIALS_PRESET
+
+__all__ = ["CLINICAL_TRIALS_PRESET"]
diff --git a/fetcharoo/presets/clinical_trials.py b/fetcharoo/presets/clinical_trials.py
new file mode 100644
index 0000000..fa630a4
--- /dev/null
+++ b/fetcharoo/presets/clinical_trials.py
@@ -0,0 +1,140 @@
+"""
+Preset configuration for monitoring ClinicalTrials.gov MCP servers.
+
+Works with common clinical trials MCP servers including:
+- cyanheads/clinicaltrialsgov-mcp-server
+- JackKuo666/ClinicalTrials-MCP-Server
+- Augmented-Nature/ClinicalTrials-MCP-Server
+- MALathon/trial-guide
+
+All of these wrap the ClinicalTrials.gov API v2, so the data model is consistent.
+
+Usage:
+    from fetcharoo.presets.clinical_trials import CLINICAL_TRIALS_PRESET
+    from fetcharoo.mcp_monitor import SnapshotStore, snapshot_data
+
+    # If you already have data from your MCP server:
+    store = SnapshotStore()
+    diff = snapshot_data(
+        store=store,
+        source_key="diabetes-recruiting",
+        records=studies,  # list of study dicts from your MCP server
+        record_id_field=CLINICAL_TRIALS_PRESET["record_id_field"],
+    )
+
+    # Or use the async MCP client to call the server directly:
+    from fetcharoo.mcp_monitor import snapshot_mcp_tool
+    diff = await snapshot_mcp_tool(
+        store=store,
+        server_command=CLINICAL_TRIALS_PRESET["server_command"],
+        tool_name="search_studies",
+        tool_params={"query.cond": "diabetes", "filter.overallStatus": "RECRUITING"},
+        record_id_field=CLINICAL_TRIALS_PRESET["record_id_field"],
+        results_field=CLINICAL_TRIALS_PRESET["results_field"],
+    )
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+# --- ClinicalTrials.gov API v2 field paths ---
+# These are the standard nested paths in the API v2 response.
+# All MCP servers wrapping this API use the same structure.
+
+# The unique identifier for each study
+NCTID_FIELD = "protocolSection.identificationModule.nctId"
+
+# Common fields for display/summary
+TITLE_FIELD = "protocolSection.identificationModule.officialTitle"
+BRIEF_TITLE_FIELD = "protocolSection.identificationModule.briefTitle"
+STATUS_FIELD = "protocolSection.statusModule.overallStatus"
+PHASE_FIELD = "protocolSection.designModule.phases"
+CONDITIONS_FIELD = "protocolSection.conditionsModule.conditions"
+INTERVENTIONS_FIELD = "protocolSection.armsInterventionsModule.interventions"
+SPONSOR_FIELD = "protocolSection.sponsorCollaboratorsModule.leadSponsor.name"
+ENROLLMENT_FIELD = "protocolSection.designModule.enrollmentInfo.count"
+START_DATE_FIELD = "protocolSection.statusModule.startDateStruct.date"
+LAST_UPDATE_FIELD = "protocolSection.statusModule.lastUpdatePostDateStruct.date"
+
+# Where results are nested in common MCP server responses
+# Different servers may nest results differently:
+COMMON_RESULTS_FIELDS = ["studies", "results", "data", "items"]
+
+
+@dataclass
+class ClinicalTrialsPreset:
+    """Configuration preset for clinical trials MCP monitoring."""
+    record_id_field: str = NCTID_FIELD
+    results_field: Optional[str] = "studies"
+    server_command: Optional[List[str]] = None
+
+    # Fields to extract for human-readable summaries
+    summary_fields: Dict[str, str] = field(default_factory=lambda: {
+        "nct_id": NCTID_FIELD,
+        "title": BRIEF_TITLE_FIELD,
+        "status": STATUS_FIELD,
+        "phase": PHASE_FIELD,
+        "conditions": CONDITIONS_FIELD,
+        "sponsor": SPONSOR_FIELD,
+        "enrollment": ENROLLMENT_FIELD,
+        "start_date": START_DATE_FIELD,
+        "last_update": LAST_UPDATE_FIELD,
+    })
+
+    def format_record_summary(self, record: Dict[str, Any]) -> str:
+        """Format a study record into a readable one-line summary."""
+        from fetcharoo.mcp_monitor import _extract_nested
+
+        nct_id = _extract_nested(record, self.record_id_field) or "Unknown"
+        title = _extract_nested(record, BRIEF_TITLE_FIELD) or "Untitled"
+        status = _extract_nested(record, STATUS_FIELD) or "Unknown"
+        phase = _extract_nested(record, PHASE_FIELD)
+        if isinstance(phase, list):
+            phase = ", ".join(str(p) for p in phase)
+        phase_str = f" [{phase}]" if phase else ""
+
+        return f"{nct_id}: {title} ({status}{phase_str})"
+
+    def format_diff_summary(self, diff) -> str:
+        """Format a SnapshotDiff into a clinical-trials-specific summary."""
+        lines = [f"Clinical Trials Monitor — {diff.source_key}"]
+        lines.append(f"  {diff.summary}")
+        lines.append("")
+
+        if diff.new:
+            lines.append("  New trials:")
+            for rec in diff.new:
+                lines.append(f"    + {self.format_record_summary(rec.data)}")
+
+        if diff.changed:
+            lines.append("  Updated trials:")
+            for rec in diff.changed:
+                lines.append(f"    ~ {self.format_record_summary(rec.data)}")
+
+        if diff.removed:
+            lines.append("  Removed trials:")
+            for rec in diff.removed:
+                lines.append(f"    - {self.format_record_summary(rec.data)}")
+
+        if not diff.has_changes:
+            lines.append("  No changes since last check.")
+
+        return "\n".join(lines)
+
+
+# Default preset instance
+CLINICAL_TRIALS_PRESET = ClinicalTrialsPreset()
+
+
+# --- Alternative record_id_field values for different MCP server formats ---
+# Some servers flatten the structure. Try these if the default doesn't work:
+
+ALTERNATIVE_ID_FIELDS = [
+    "protocolSection.identificationModule.nctId",  # Standard API v2 nested
+    "nctId",                                         # Flattened by some servers
+    "NCTId",                                         # Alternative casing
+    "id",                                            # Generic
+    "study_id",                                      # Some custom servers
+    "trialId",                                       # Another variant
+]
diff --git a/tests/test_mcp_monitor.py b/tests/test_mcp_monitor.py
new file mode 100644
index 0000000..0cec32b
--- /dev/null
+++ b/tests/test_mcp_monitor.py
@@ -0,0 +1,290 @@
+"""Tests for MCP source monitoring and snapshot diffing."""
+
+import json
+import os
+import tempfile
+import unittest
+
+from fetcharoo.mcp_monitor import (
+    SnapshotStore,
+    SnapshotDiff,
+    SnapshotRecord,
+    snapshot_data,
+    _extract_nested,
+    _hash_json,
+)
+
+
+class TestHelpers(unittest.TestCase):
+
+    def test_extract_nested_simple(self):
+        self.assertEqual(_extract_nested({"a": 1}, "a"), 1)
+
+    def test_extract_nested_deep(self):
+        data = {"a": {"b": {"c": 42}}}
+        self.assertEqual(_extract_nested(data, "a.b.c"), 42)
+
+    def test_extract_nested_missing(self):
+        self.assertIsNone(_extract_nested({"a": 1}, "b"))
+
+    def test_extract_nested_list_index(self):
+        data = {"items": [{"id": "first"}, {"id": "second"}]}
+        self.assertEqual(_extract_nested(data, "items.0.id"), "first")
+        self.assertEqual(_extract_nested(data, "items.1.id"), "second")
+
+    def test_extract_nested_none_safe(self):
+        self.assertIsNone(_extract_nested(None, "a.b"))
+
+    def test_hash_json_deterministic(self):
+        h1 = _hash_json({"a": 1, "b": 2})
+        h2 = _hash_json({"b": 2, "a": 1})  # different order, same content
+        self.assertEqual(h1, h2)
+
+    def test_hash_json_different(self):
+        h1 = _hash_json({"a": 1})
+        h2 = _hash_json({"a": 2})
+        self.assertNotEqual(h1, h2)
+
+
+class TestSnapshotStore(unittest.TestCase):
+
+    def setUp(self):
+        self.tmp = tempfile.mktemp(suffix='.db')
+        self.store = SnapshotStore(db_path=self.tmp)
+
+    def tearDown(self):
+        self.store.close()
+        if os.path.exists(self.tmp):
+            os.unlink(self.tmp)
+
+    def test_empty_store(self):
+        sources = self.store.list_sources()
+        self.assertEqual(len(sources), 0)
+
+    def test_first_snapshot_all_new(self):
+        records = [
+            {"id": "NCT001", "title": "Trial A", "status": "RECRUITING"},
+            {"id": "NCT002", "title": "Trial B", "status": "ACTIVE"},
+        ]
+        diff = self.store.take_snapshot("trials:diabetes", records, "id")
+
+        self.assertEqual(len(diff.new), 2)
+        self.assertEqual(len(diff.unchanged), 0)
+        self.assertEqual(len(diff.removed), 0)
+        self.assertTrue(diff.has_changes)
+
+    def test_second_snapshot_unchanged(self):
+        records = [{"id": "NCT001", "title": "Trial A"}]
+        self.store.take_snapshot("test", records, "id")
+
+        diff = self.store.take_snapshot("test", records, "id")
+        self.assertEqual(len(diff.new), 0)
+        self.assertEqual(len(diff.unchanged), 1)
+        self.assertEqual(len(diff.removed), 0)
+        self.assertFalse(diff.has_changes)
+
+    def test_snapshot_detects_new(self):
+        self.store.take_snapshot("test", [{"id": "A"}], "id")
+        diff = self.store.take_snapshot("test", [{"id": "A"}, {"id": "B"}], "id")
+
+        self.assertEqual(len(diff.new), 1)
+        self.assertEqual(diff.new[0].record_id, "B")
+        self.assertEqual(len(diff.unchanged), 1)
+
+    def test_snapshot_detects_removed(self):
+        self.store.take_snapshot("test", [{"id": "A"}, {"id": "B"}], "id")
+        diff = self.store.take_snapshot("test", [{"id": "A"}], "id")
+
+        self.assertEqual(len(diff.removed), 1)
+        self.assertEqual(diff.removed[0].record_id, "B")
+
+    def test_snapshot_detects_changed(self):
+        self.store.take_snapshot("test", [{"id": "A", "val": 1}], "id")
+        diff = self.store.take_snapshot("test", [{"id": "A", "val": 2}], "id")
+
+        self.assertEqual(len(diff.changed), 1)
+        self.assertEqual(diff.changed[0].record_id, "A")
+
+    def test_snapshot_mixed_changes(self):
+        self.store.take_snapshot("test", [
+            {"id": "keep", "v": 1},
+            {"id": "change", "v": 1},
+            {"id": "remove", "v": 1},
+        ], "id")
+
+        diff = self.store.take_snapshot("test", [
+            {"id": "keep", "v": 1},     # unchanged
+            {"id": "change", "v": 2},    # changed
+            {"id": "added", "v": 1},     # new
+        ], "id")
+
+        self.assertEqual(len(diff.unchanged), 1)
+        self.assertEqual(len(diff.changed), 1)
+        self.assertEqual(len(diff.new), 1)
+        self.assertEqual(len(diff.removed), 1)
+
+    def test_nested_record_id(self):
+        records = [
+            {"protocol": {"id_module": {"nctId": "NCT001"}}, "title": "A"},
+            {"protocol": {"id_module": {"nctId": "NCT002"}}, "title": "B"},
+        ]
+        diff = self.store.take_snapshot("trials", records, "protocol.id_module.nctId")
+
+        self.assertEqual(len(diff.new), 2)
+        ids = {r.record_id for r in diff.new}
+        self.assertEqual(ids, {"NCT001", "NCT002"})
+
+    def test_get_current_records(self):
+        records = [{"id": "A", "data": 1}, {"id": "B", "data": 2}]
+        self.store.take_snapshot("test", records, "id")
+
+        current = self.store.get_current_records("test")
+        self.assertEqual(len(current), 2)
+
+    def test_get_record(self):
+        self.store.take_snapshot("test", [{"id": "A", "val": 42}], "id")
+        rec = self.store.get_record("test", "A")
+        self.assertIsNotNone(rec)
+        self.assertEqual(rec['data']['val'], 42)
+
+    def test_get_record_not_found(self):
+        rec = self.store.get_record("test", "nonexistent")
+        self.assertIsNone(rec)
+
+    def test_snapshot_history(self):
+        self.store.take_snapshot("test", [{"id": "A"}], "id")
+        self.store.take_snapshot("test", [{"id": "A"}, {"id": "B"}], "id")
+
+        history = self.store.get_snapshot_history("test")
+        self.assertEqual(len(history), 2)
+
+    def test_list_sources(self):
+        self.store.take_snapshot("source_a", [{"id": "1"}], "id")
+        self.store.take_snapshot("source_b", [{"id": "2"}, {"id": "3"}], "id")
+
+        sources = self.store.list_sources()
+        self.assertEqual(len(sources), 2)
+
+    def test_search_records(self):
+        self.store.take_snapshot("trials", [
+            {"id": "NCT001", "condition": "diabetes"},
+            {"id": "NCT002", "condition": "cancer"},
+        ], "id")
+
+        results = self.store.search_records("diabetes")
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['record_id'], "NCT001")
+
+    def test_export_json(self):
+        self.store.take_snapshot("test", [{"id": "A"}], "id")
+        exported = self.store.export_json("test")
+        data = json.loads(exported)
+        self.assertEqual(len(data), 1)
+
+    def test_summary_string(self):
+        diff = SnapshotDiff(
+            source_key="test",
+            new=[SnapshotRecord("1", "h1")],
+            removed=[SnapshotRecord("2", "h2")],
+        )
+        self.assertIn("new=1", diff.summary)
+        self.assertIn("removed=1", diff.summary)
+
+
+class TestSnapshotData(unittest.TestCase):
+    """Test the synchronous snapshot_data convenience function."""
+
+    def setUp(self):
+        self.tmp = tempfile.mktemp(suffix='.db')
+        self.store = SnapshotStore(db_path=self.tmp)
+
+    def tearDown(self):
+        self.store.close()
+        if os.path.exists(self.tmp):
+            os.unlink(self.tmp)
+
+    def test_snapshot_data_basic(self):
+        records = [{"id": "1", "name": "alpha"}, {"id": "2", "name": "beta"}]
+        diff = snapshot_data(self.store, "test", records, "id")
+        self.assertEqual(len(diff.new), 2)
+        self.assertTrue(diff.has_changes)
+
+    def test_snapshot_data_idempotent(self):
+        records = [{"id": "1", "name": "alpha"}]
+        snapshot_data(self.store, "test", records, "id")
+        diff = snapshot_data(self.store, "test", records, "id")
+        self.assertFalse(diff.has_changes)
+
+
+class TestToolCache(unittest.TestCase):
+    """Test the MCP proxy tool cache."""
+
+    def setUp(self):
+        self.tmp = tempfile.mktemp(suffix='.db')
+        from fetcharoo.mcp_proxy import ToolCache
+        self.cache = ToolCache(db_path=self.tmp)
+
+    def tearDown(self):
+        self.cache.close()
+        if os.path.exists(self.tmp):
+            os.unlink(self.tmp)
+
+    def test_cache_miss(self):
+        result = self.cache.get("tool", {"q": "test"})
+        self.assertIsNone(result)
+
+    def test_cache_put_and_get(self):
+        self.cache.put("tool", {"q": "test"}, "result text")
+        result = self.cache.get("tool", {"q": "test"}, ttl=3600)
+        self.assertEqual(result, "result text")
+
+    def test_cache_ttl_zero_always_miss(self):
+        self.cache.put("tool", {"q": "test"}, "result text")
+        result = self.cache.get("tool", {"q": "test"}, ttl=0)
+        self.assertIsNone(result)
+
+    def test_cache_detects_change(self):
+        self.cache.put("tool", {}, "version 1")
+        changed = self.cache.put("tool", {}, "version 2")
+        self.assertTrue(changed)
+
+    def test_cache_no_change(self):
+        self.cache.put("tool", {}, "same")
+        changed = self.cache.put("tool", {}, "same")
+        self.assertFalse(changed)
+
+    def test_cache_invalidate(self):
+        self.cache.put("tool_a", {}, "a")
+        self.cache.put("tool_b", {}, "b")
+        count = self.cache.invalidate("tool_a")
+        self.assertEqual(count, 1)
+        self.assertIsNone(self.cache.get("tool_a", {}, ttl=3600))
+        self.assertIsNotNone(self.cache.get("tool_b", {}, ttl=3600))
+
+    def test_cache_invalidate_all(self):
+        self.cache.put("tool_a", {}, "a")
+        self.cache.put("tool_b", {}, "b")
+        count = self.cache.invalidate()
+        self.assertEqual(count, 2)
+
+    def test_cache_history(self):
+        self.cache.put("tool", {}, "v1")
+        self.cache.put("tool", {}, "v2")
+        history = self.cache.get_history()
+        self.assertEqual(len(history), 2)
+
+    def test_get_all_entries(self):
+        self.cache.put("tool_a", {"x": 1}, "a")
+        self.cache.put("tool_b", {"y": 2}, "b")
+        entries = self.cache.get_all_entries()
+        self.assertEqual(len(entries), 2)
+
+    def test_get_all_entries_filtered(self):
+        self.cache.put("tool_a", {}, "a")
+        self.cache.put("tool_b", {}, "b")
+        entries = self.cache.get_all_entries("tool_a")
+        self.assertEqual(len(entries), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 385ac3504a6802f123f4ca2f2e7c7e4f2af2b984 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 7 Apr 2026 23:49:31 +0000
Subject: [PATCH 6/6] Unify MCP server and proxy into single mode

Merge the separate MCP server and proxy into one server:
  - fetcharoo mcp serve              (standalone)
  - fetcharoo mcp serve --upstream X (with caching proxy)

When --upstream is provided, upstream_call, upstream_refresh,
cache_status, and cache_clear tools are added alongside the
existing PDF + snapshot tools. No separate proxy command needed.

https://claude.ai/code/session_01EFk8Enntgip8z3nqk1ppkA
---
 fetcharoo/cli.py        |  33 ++--
 fetcharoo/mcp_server.py | 408 ++++++++++++++++++----------------------
 2 files changed, 199 insertions(+), 242 deletions(-)

diff --git a/fetcharoo/cli.py b/fetcharoo/cli.py
index 73d80d0..db13ae3 100644
--- a/fetcharoo/cli.py
+++ b/fetcharoo/cli.py
@@ -18,7 +18,7 @@
 from fetcharoo.filtering import FilterConfig
 
 # Subcommands that the CLI recognizes
-SUBCOMMANDS = {'diff', 'watch', 'catalog', 'schemas', 'mcp', 'proxy', 'monitor'}
+SUBCOMMANDS = {'diff', 'watch', 'catalog', 'schemas', 'mcp', 'monitor'}
 
 
 def configure_logging(quiet: int, verbose: int) -> None:
@@ -517,28 +517,21 @@ def _handle_schemas(argv: list) -> int:
 def _handle_mcp(argv: list) -> int:
     """Handle the 'mcp' subcommand."""
     if not argv or argv[0] != 'serve':
-        print("Usage: fetcharoo mcp serve")
+        print("Usage: fetcharoo mcp serve [--upstream CMD] [--ttl SECONDS]")
         return 1
 
-    from fetcharoo.mcp_server import main as mcp_main
-    mcp_main()
-    return 0
-
-
-def _handle_proxy(argv: list) -> int:
-    """Handle the 'proxy' subcommand — MCP caching proxy."""
-    parser = argparse.ArgumentParser(
-        prog='fetcharoo proxy',
-        description='Start a caching MCP proxy that wraps any upstream MCP server.',
-    )
-    parser.add_argument('--server', type=str, required=True, help='command to start upstream MCP server (e.g., "npx trial-guide")')
-    parser.add_argument('--ttl', type=float, default=3600, help='cache TTL in seconds (default: 3600, 0=no cache)')
-    parser.add_argument('--cache-db', type=str, help='path to cache database')
+    parser = argparse.ArgumentParser(prog='fetcharoo mcp serve')
+    parser.add_argument('--upstream', type=str, default=None,
+                        help='upstream MCP server command to proxy (e.g., "npx trial-guide")')
+    parser.add_argument('--ttl', type=float, default=3600,
+                        help='cache TTL for proxied calls in seconds (default: 3600)')
+    parser.add_argument('--cache-db', type=str, default=None,
+                        help='path to cache database')
 
-    args = parser.parse_args(argv)
+    args = parser.parse_args(argv[1:])  # skip 'serve'
 
-    from fetcharoo.mcp_proxy import run_proxy
-    run_proxy(args.server, ttl=args.ttl, cache_db_path=args.cache_db)
+    from fetcharoo.mcp_server import main as mcp_main
+    mcp_main(upstream=args.upstream, ttl=args.ttl, cache_db=args.cache_db)
     return 0
 
 
@@ -690,8 +683,6 @@ def main(argv: Optional[list] = None) -> int:
                 return _handle_schemas(rest)
             elif command == 'mcp':
                 return _handle_mcp(rest)
-            elif command == 'proxy':
-                return _handle_proxy(rest)
             elif command == 'monitor':
                 return _handle_monitor(rest)
         except KeyboardInterrupt:
diff --git a/fetcharoo/mcp_server.py b/fetcharoo/mcp_server.py
index b85f15c..4185e1d 100644
--- a/fetcharoo/mcp_server.py
+++ b/fetcharoo/mcp_server.py
@@ -1,13 +1,17 @@
 """
-MCP (Model Context Protocol) server for fetcharoo.
+MCP server for fetcharoo.
 
-Exposes fetcharoo's stateful capabilities as MCP tools, enabling AI agents
-to discover, download, and track PDF documents persistently.
+A single MCP server that provides:
+  1. PDF discovery, download, and tracking tools (always available)
+  2. Snapshot monitoring for any data source (always available)
+  3. Caching proxy for an upstream MCP server (when --upstream is provided)
 
 Usage:
+    # Standalone — PDF tools + snapshot monitoring
     fetcharoo mcp serve
-    # or directly:
-    python -m fetcharoo.mcp_server
+
+    # With upstream proxy — all of the above + cached proxy to another MCP server
+    fetcharoo mcp serve --upstream "npx trial-guide" --ttl 3600
 """
 
 import json
@@ -28,15 +32,21 @@ def _check_mcp_available():
         return False
 
 
-def create_server():
+def create_server(
+    upstream_command: Optional[str] = None,
+    ttl: float = 3600,
+    cache_db_path: Optional[str] = None,
+):
     """
-    Create and configure the fetcharoo MCP server.
+    Create the unified fetcharoo MCP server.
+
+    Args:
+        upstream_command: If provided, also proxy this upstream MCP server with caching.
+        ttl: Cache TTL in seconds for proxied calls (default: 1 hour).
+        cache_db_path: Path to cache/snapshot database.
 
     Returns:
         A configured FastMCP server instance.
-
-    Raises:
-        ImportError: If the mcp package is not installed.
     """
     try:
         from mcp.server.fastmcp import FastMCP
@@ -46,18 +56,23 @@ def create_server():
             "Install it with: pip install 'fetcharoo[mcp]' or pip install mcp"
         )
 
-    from fetcharoo.catalog import DocumentCatalog, DiffResult
+    from fetcharoo.catalog import DocumentCatalog
     from fetcharoo.fetcharoo import find_pdfs_from_webpage, download_pdfs_from_webpage
     from fetcharoo.filtering import FilterConfig
-    from fetcharoo.watcher import diff_once
+    from fetcharoo.mcp_monitor import SnapshotStore, snapshot_data
+    from fetcharoo.mcp_proxy import ToolCache
+
+    desc = "PDF discovery, document tracking, and snapshot monitoring"
+    if upstream_command:
+        desc += f" | caching proxy for: {upstream_command}"
 
-    mcp = FastMCP(
-        "fetcharoo",
-        description="PDF document discovery, download, and tracking from websites",
-    )
+    mcp = FastMCP("fetcharoo", description=desc)
 
-    # Shared catalog instance
     _catalog = DocumentCatalog()
+    _snapshot_store = SnapshotStore()
+    _tool_cache = ToolCache(db_path=cache_db_path) if upstream_command else None
+
+    # ===== PDF tools (always available) =====
 
     @mcp.tool()
     def discover_pdfs(
@@ -69,9 +84,6 @@ def discover_pdfs(
         """
         Discover all PDF documents available on a webpage.
 
-        Crawls the given URL (optionally following links to the specified depth)
-        and returns a structured list of all PDF URLs found.
-
         Args:
             url: The webpage URL to search for PDFs.
             recursion_depth: How many levels of links to follow (0-5).
@@ -79,11 +91,8 @@ def discover_pdfs(
             exclude_patterns: Filename patterns to exclude (e.g., ['*draft*']).
         """
         pdf_urls = find_pdfs_from_webpage(
-            url,
-            recursion_depth=min(recursion_depth, 5),
+            url, recursion_depth=min(recursion_depth, 5),
         )
-
-        # Apply filtering if patterns provided
         if include_patterns or exclude_patterns:
             from fetcharoo.filtering import should_download_pdf
             config = FilterConfig(
@@ -92,14 +101,11 @@ def discover_pdfs(
             )
             pdf_urls = [u for u in pdf_urls if should_download_pdf(u, filter_config=config)]
 
-        # Record discoveries in catalog
         for pdf_url in pdf_urls:
             _catalog.record_discovery(pdf_url, source_page=url)
 
         return json.dumps({
-            "source_url": url,
-            "count": len(pdf_urls),
-            "pdfs": pdf_urls,
+            "source_url": url, "count": len(pdf_urls), "pdfs": pdf_urls,
         }, indent=2)
 
     @mcp.tool()
@@ -111,8 +117,7 @@ def download_pdfs(
         output_name: Optional[str] = None,
     ) -> str:
         """
-        Download PDF documents from a webpage with fetcharoo's full reliability
-        (retry logic, rate limiting, deduplication, security hardening).
+        Download PDF documents from a webpage with full reliability.
 
         Args:
             url: The webpage URL to download PDFs from.
@@ -122,13 +127,10 @@ def download_pdfs(
             output_name: Custom filename for merged output.
         """
         result = download_pdfs_from_webpage(
-            url,
-            recursion_depth=min(recursion_depth, 5),
+            url, recursion_depth=min(recursion_depth, 5),
             mode='merge' if merge else 'separate',
-            write_dir=output_dir,
-            output_name=output_name,
+            write_dir=output_dir, output_name=output_name,
         )
-
         return json.dumps({
             "success": result.success,
             "downloaded_count": result.downloaded_count,
@@ -138,158 +140,76 @@ def download_pdfs(
             "errors": result.errors,
         }, indent=2)
 
-    @mcp.tool()
-    def catalog_query(
-        source_url: Optional[str] = None,
-    ) -> str:
-        """
-        Query the persistent document catalog.
+    # ===== Catalog tools (always available) =====
 
-        Shows all documents fetcharoo has ever seen, with metadata including
-        when they were first/last seen, content hashes, and file sizes.
-        This is persistent memory across sessions.
-
-        Args:
-            source_url: If provided, only show documents from this source page.
-        """
+    @mcp.tool()
+    def catalog_query(source_url: Optional[str] = None) -> str:
+        """Query the persistent document catalog. Shows all tracked documents."""
         docs = _catalog.get_active_documents(source_page=source_url)
         return json.dumps({
             "total_documents": len(docs),
             "documents": [
-                {
-                    "url": d.url,
-                    "filename": d.filename,
-                    "size_bytes": d.size_bytes,
-                    "first_seen": d.first_seen,
-                    "last_seen": d.last_seen,
-                    "last_changed": d.last_changed,
-                    "status": d.status,
-                    "metadata": d.metadata,
-                }
+                {"url": d.url, "filename": d.filename, "size_bytes": d.size_bytes,
+                 "first_seen": d.first_seen, "last_seen": d.last_seen,
+                 "status": d.status, "metadata": d.metadata}
                 for d in docs
             ],
         }, indent=2)
 
     @mcp.tool()
-    def catalog_diff(
-        url: str,
-        recursion_depth: int = 0,
-    ) -> str:
-        """
-        Check what's changed since the last time fetcharoo looked at a URL.
-
-        Compares the current state of PDFs on a webpage against what's stored
-        in the catalog. Reports new, removed, and unchanged documents.
-
-        Args:
-            url: The webpage URL to check for changes.
-            recursion_depth: How many levels of links to follow (0-5).
-        """
-        current_urls = find_pdfs_from_webpage(
-            url,
-            recursion_depth=min(recursion_depth, 5),
-        )
-
+    def catalog_diff(url: str, recursion_depth: int = 0) -> str:
+        """Check what PDFs have changed since last check on a URL."""
+        current_urls = find_pdfs_from_webpage(url, recursion_depth=min(recursion_depth, 5))
         diff = _catalog.diff(current_urls)
-
-        # Update catalog
         for doc in diff.new:
             _catalog.record_discovery(doc.url, source_page=url)
         for doc in diff.removed:
             _catalog.mark_removed(doc.url)
         _catalog.record_run(url, diff)
-
         return json.dumps({
             "source_url": url,
-            "summary": {
-                "new": len(diff.new),
-                "changed": len(diff.changed),
-                "removed": len(diff.removed),
-                "unchanged": len(diff.unchanged),
-            },
+            "summary": {"new": len(diff.new), "changed": len(diff.changed),
+                        "removed": len(diff.removed), "unchanged": len(diff.unchanged)},
             "new_documents": [d.url for d in diff.new],
             "removed_documents": [d.url for d in diff.removed],
-            "unchanged_documents": [d.url for d in diff.unchanged],
         }, indent=2)
 
     @mcp.tool()
-    def catalog_search(
-        query: str,
-    ) -> str:
-        """
-        Search across all tracked documents by URL or filename substring.
-
-        Args:
-            query: Search string to match against document URLs and filenames.
-        """
+    def catalog_search(query: str) -> str:
+        """Search tracked documents by URL or filename."""
         docs = _catalog.search(query)
         return json.dumps({
-            "query": query,
-            "results_count": len(docs),
-            "results": [
-                {
-                    "url": d.url,
-                    "filename": d.filename,
-                    "status": d.status,
-                    "first_seen": d.first_seen,
-                    "last_seen": d.last_seen,
-                }
-                for d in docs
-            ],
+            "query": query, "results_count": len(docs),
+            "results": [{"url": d.url, "filename": d.filename, "status": d.status,
+                         "first_seen": d.first_seen, "last_seen": d.last_seen}
+                        for d in docs],
         }, indent=2)
 
     @mcp.tool()
-    def get_document_metadata(
-        url: str,
-    ) -> str:
-        """
-        Get detailed information about a specific tracked document.
-
-        Args:
-            url: The URL of the document to look up.
-        """
+    def get_document_metadata(url: str) -> str:
+        """Get detailed info about a tracked document."""
         doc = _catalog.get_document(url)
         if doc is None:
             return json.dumps({"error": f"Document not found: {url}"})
-
         return json.dumps({
-            "url": doc.url,
-            "filename": doc.filename,
-            "content_hash": doc.content_hash,
-            "size_bytes": doc.size_bytes,
-            "first_seen": doc.first_seen,
-            "last_seen": doc.last_seen,
-            "last_changed": doc.last_changed,
-            "status": doc.status,
-            "source_page": doc.source_page,
-            "metadata": doc.metadata,
+            "url": doc.url, "filename": doc.filename,
+            "content_hash": doc.content_hash, "size_bytes": doc.size_bytes,
+            "first_seen": doc.first_seen, "last_seen": doc.last_seen,
+            "last_changed": doc.last_changed, "status": doc.status,
+            "source_page": doc.source_page, "metadata": doc.metadata,
         }, indent=2)
 
     @mcp.tool()
     def find_duplicate_documents() -> str:
-        """
-        Find documents that have identical content but different URLs.
-
-        Uses content hashing to detect when the same PDF exists at multiple URLs.
-        """
+        """Find documents with identical content at different URLs."""
         duplicates = _catalog.find_duplicates()
-        result = {}
-        for hash_val, docs in duplicates.items():
-            result[hash_val] = [d.url for d in docs]
+        result = {h: [d.url for d in docs] for h, docs in duplicates.items()}
+        return json.dumps({"duplicate_groups": len(result), "duplicates": result}, indent=2)
 
-        return json.dumps({
-            "duplicate_groups": len(result),
-            "duplicates": result,
-        }, indent=2)
-
-    # --- Snapshot monitoring tools ---
-
-    from fetcharoo.mcp_monitor import SnapshotStore, snapshot_data
-
-    _snapshot_store = SnapshotStore()
+    # ===== Snapshot monitoring tools (always available) =====
 
     @mcp.tool()
-    def snapshot_monitor(
+    def snapshot(
         source_key: str,
         records: list,
         record_id_field: str = "id",
@@ -297,108 +217,154 @@ def snapshot_monitor(
         """
         Snapshot a list of records and diff against the previous snapshot.
 
-        Use this to monitor ANY data source for changes over time — clinical trials,
-        document listings, API results, etc. Pass the data you've already fetched,
-        and fetcharoo will tell you what's new, changed, or removed since last time.
+        Use this to monitor ANY data source for changes over time.
+        Pass data you've already fetched from any tool or API, and get back
+        what's new, changed, or removed since last time.
 
         Args:
-            source_key: A name for this data source (e.g., "diabetes-trials-recruiting").
+            source_key: Name for this data source (e.g., "diabetes-trials").
             records: List of record dicts to snapshot.
-            record_id_field: Dot-notation path to the unique ID in each record
-                           (e.g., "protocolSection.identificationModule.nctId" for clinical trials,
-                            or "id" for simpler records).
+            record_id_field: Dot-notation path to the unique ID in each record.
         """
         diff = snapshot_data(
-            store=_snapshot_store,
-            source_key=source_key,
-            records=records,
-            record_id_field=record_id_field,
+            store=_snapshot_store, source_key=source_key,
+            records=records, record_id_field=record_id_field,
         )
         return json.dumps({
-            "source_key": diff.source_key,
-            "has_changes": diff.has_changes,
-            "summary": {
-                "new": len(diff.new),
-                "changed": len(diff.changed),
-                "removed": len(diff.removed),
-                "unchanged": len(diff.unchanged),
-            },
+            "source_key": diff.source_key, "has_changes": diff.has_changes,
+            "summary": {"new": len(diff.new), "changed": len(diff.changed),
+                        "removed": len(diff.removed), "unchanged": len(diff.unchanged)},
             "new_records": [{"id": r.record_id, "data": r.data} for r in diff.new],
             "changed_records": [{"id": r.record_id, "data": r.data} for r in diff.changed],
             "removed_records": [{"id": r.record_id} for r in diff.removed],
         }, indent=2)
 
     @mcp.tool()
-    def snapshot_query(
-        source_key: str,
-    ) -> str:
-        """
-        Get all current records for a monitored data source.
-
-        Returns the latest snapshot of all active records.
-
-        Args:
-            source_key: The data source name (e.g., "diabetes-trials-recruiting").
-        """
+    def snapshot_query(source_key: str) -> str:
+        """Get all current records for a monitored data source."""
         records = _snapshot_store.get_current_records(source_key)
         return json.dumps({
-            "source_key": source_key,
-            "record_count": len(records),
-            "records": records,
-        }, indent=2)
-
-    @mcp.tool()
-    def snapshot_history(
-        source_key: Optional[str] = None,
-    ) -> str:
-        """
-        View the history of snapshot runs for a data source.
-
-        Shows when each snapshot was taken and what changed.
-
-        Args:
-            source_key: Filter by source name. If None, shows all sources.
-        """
-        history = _snapshot_store.get_snapshot_history(source_key)
-        return json.dumps({
-            "runs": history,
+            "source_key": source_key, "record_count": len(records), "records": records,
         }, indent=2)
 
     @mcp.tool()
     def snapshot_sources() -> str:
-        """
-        List all data sources being monitored via snapshots.
-
-        Shows each source with its record count and last update time.
-        """
+        """List all data sources being monitored."""
         sources = _snapshot_store.list_sources()
-        return json.dumps({
-            "sources": sources,
-        }, indent=2)
+        return json.dumps({"sources": sources}, indent=2)
 
     @mcp.tool()
-    def snapshot_search(
-        query: str,
-        source_key: Optional[str] = None,
-    ) -> str:
-        """
-        Search across all snapshot records by content.
-
-        Args:
-            query: Search string to match against record data.
-            source_key: Optionally limit search to a specific source.
-        """
+    def snapshot_search(query: str, source_key: Optional[str] = None) -> str:
+        """Search across all snapshot records by content."""
         results = _snapshot_store.search_records(query, source_key)
         return json.dumps({
-            "query": query,
-            "results_count": len(results),
-            "results": results,
+            "query": query, "results_count": len(results), "results": results,
         }, indent=2)
 
+    # ===== Upstream proxy tools (only when --upstream is provided) =====
+
+    if upstream_command and _tool_cache:
+        import asyncio
+        from mcp import ClientSession, StdioServerParameters
+        from mcp.client.stdio import stdio_client
+
+        parts = upstream_command.split()
+        upstream_params = StdioServerParameters(
+            command=parts[0],
+            args=parts[1:] if len(parts) > 1 else [],
+        )
+
+        async def _call_upstream(tool_name: str, arguments: dict) -> str:
+            async with stdio_client(upstream_params) as (read, write):
+                async with ClientSession(read, write) as session:
+                    await session.initialize()
+                    result = await session.call_tool(tool_name, arguments=arguments)
+                    text_parts = []
+                    if hasattr(result, 'content'):
+                        for item in result.content:
+                            if hasattr(item, 'text'):
+                                text_parts.append(item.text)
+                    return "\n".join(text_parts)
+
+        def _call_upstream_sync(tool_name: str, arguments: dict) -> str:
+            return asyncio.run(_call_upstream(tool_name, arguments))
+
+        @mcp.tool()
+        def upstream_call(
+            tool_name: str,
+            arguments: Optional[dict] = None,
+            bypass_cache: bool = False,
+        ) -> str:
+            """
+            Call a tool on the upstream MCP server through the cache.
+
+            First checks the cache. If the result is fresh (within TTL), returns
+            the cached version. Otherwise calls upstream, caches, and returns.
+
+            Args:
+                tool_name: Name of the upstream tool to call.
+                arguments: Arguments dict to pass to the tool.
+                bypass_cache: If True, skip cache and always call upstream.
+            """
+            if arguments is None:
+                arguments = {}
+
+            from fetcharoo.mcp_proxy import _cache_key
+
+            if not bypass_cache:
+                cached = _tool_cache.get(tool_name, arguments, ttl=ttl)
+                if cached is not None:
+                    key = _cache_key(tool_name, arguments)
+                    return json.dumps({
+                        "_source": "cache", "_cache_key": key, "result": cached,
+                    })
+
+            result_text = _call_upstream_sync(tool_name, arguments)
+            changed = _tool_cache.put(tool_name, arguments, result_text)
+            key = _cache_key(tool_name, arguments)
+            return json.dumps({
+                "_source": "upstream", "_cache_key": key,
+                "_changed_since_last": changed, "result": result_text,
+            }, indent=2)
+
+        @mcp.tool()
+        def upstream_refresh(tool_name: str, arguments: Optional[dict] = None) -> str:
+            """
+            Force-refresh a cached upstream tool call (bypass TTL).
+
+            Args:
+                tool_name: The upstream tool to refresh.
+                arguments: Arguments to pass.
+            """
+            if arguments is None:
+                arguments = {}
+            from fetcharoo.mcp_proxy import _cache_key
+            result_text = _call_upstream_sync(tool_name, arguments)
+            changed = _tool_cache.put(tool_name, arguments, result_text)
+            key = _cache_key(tool_name, arguments)
+            return json.dumps({
+                "cache_key": key, "changed_since_last": changed, "result": result_text,
+            }, indent=2)
+
+        @mcp.tool()
+        def cache_status() -> str:
+            """Show all cached upstream tool calls and their freshness."""
+            entries = _tool_cache.get_all_entries()
+            return json.dumps({
+                "upstream": upstream_command, "ttl_seconds": ttl,
+                "cache_entries": len(entries), "entries": entries,
+            }, indent=2)
+
+        @mcp.tool()
+        def cache_clear(tool_name: Optional[str] = None) -> str:
+            """Clear the upstream cache. Optionally filter by tool name."""
+            count = _tool_cache.invalidate(tool_name)
+            return json.dumps({"cleared": count, "tool_name": tool_name or "all"})
+
     return mcp
 
 
-def main():
+def main(upstream: Optional[str] = None, ttl: float = 3600, cache_db: Optional[str] = None):
     """Run the fetcharoo MCP server."""
     if not _check_mcp_available():
         print(
@@ -408,7 +374,7 @@ def main():
         )
         sys.exit(1)
 
-    server = create_server()
+    server = create_server(upstream_command=upstream, ttl=ttl, cache_db_path=cache_db)
     server.run()