From 12dab4c6ee3994b2a3d270f598069ceb168df4f4 Mon Sep 17 00:00:00 2001 From: Arham Khan Date: Thu, 18 Sep 2025 21:06:28 +0000 Subject: [PATCH 1/2] update datasketch commit to use rust_pyhash --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6018ec4..ff5d6f5 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,7 @@ 'numpy>=1.11', 'scipy>=1.0.0', 'redis>=2.10.0', - 'datasketch @ git+https://github.com/123epsilon/datasketch.git@060a32b4b4a2272d77480dd633a1bf770678ba49', - 'pybloomfiltermmap3==0.5.7', + 'datasketch @ git+https://github.com/123epsilon/datasketch.git@8f4b34f604e3d26369a50ab731b4948c0e04eb5a', 'tqdm>=4.60.0', ] ) \ No newline at end of file From 488f6e74f5f23afbcffcccb2563e068e1b7816e2 Mon Sep 17 00:00:00 2001 From: Arham Khan Date: Thu, 18 Sep 2025 21:14:55 +0000 Subject: [PATCH 2/2] update readme --- deduplication/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deduplication/README.md b/deduplication/README.md index 2ab2e77..0528317 100644 --- a/deduplication/README.md +++ b/deduplication/README.md @@ -1,4 +1,13 @@ # Install + +This package requires `rust` and `cargo`. The recommended way to install these dependencies is using [`rustup`](https://rustup.rs/): + +```bash +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +``` + +Then, to install the python package you can use pip: + ```bash git clone https://github.com/TPC-AI/data-general-text-code-web.git cd data-general-text-code-web/