From 0f438113e5d5e41817b8acb5ab172b69ecdbec6f Mon Sep 17 00:00:00 2001
From: shoredatalabs <ibrahim.shore@zoho.com>
Date: Wed, 13 May 2026 09:54:25 -0700
Subject: [PATCH 1/4] Add <your-name> to the README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 44e0723..d0de913 100644
--- a/README.md
+++ b/README.md
@@ -91,3 +91,4 @@ scripts/
 ```
 
 Each round's `data/` directory is generated locally and gitignored.
+This is <your-name>'s PR

From ded3b43b35873c1236081e8a1fb880a9979bd52c Mon Sep 17 00:00:00 2001
From: Ibrahim Shore <ibrahim.shore@zoho.com>
Date: Wed, 13 May 2026 11:32:54 -0700
Subject: [PATCH 2/4] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index d0de913..44e0723 100644
--- a/README.md
+++ b/README.md
@@ -91,4 +91,3 @@ scripts/
 ```
 
 Each round's `data/` directory is generated locally and gitignored.
-This is <your-name>'s PR

From d5fb559b5b898a8e2b1c0badb1150e2b07707cd0 Mon Sep 17 00:00:00 2001
From: shoredatalabs <ibrahim.shore@zoho.com>
Date: Wed, 13 May 2026 11:45:49 -0700
Subject: [PATCH 3/4] ShoreDataLabs performance improvements

---
 rounds/1_histogram/solution.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py
index dffbee5..9e5cd7f 100644
--- a/rounds/1_histogram/solution.py
+++ b/rounds/1_histogram/solution.py
@@ -5,10 +5,20 @@
 own faster implementation.
 """
 
+import numpy as np
+
 
 def compute_histogram(path: str) -> dict[bytes, int]:
     """Frequency of every 2-byte bigram in the file at ``path``."""
-    # TODO: remove this delegation and write your own implementation here.
-    from .baseline import compute_histogram as _baseline
+    with open(path, "rb") as f:
+        data = f.read()
+
+    arr = np.frombuffer(data, dtype=np.uint8)
+    # Encode each bigram as a uint16 index: high_byte * 256 + low_byte
+    indices = arr[:-1].astype(np.uint16) * 256 + arr[1:]
+    counts = np.bincount(indices, minlength=65536)
 
-    return _baseline(path)
+    result: dict[bytes, int] = {}
+    for idx in np.nonzero(counts)[0]:
+        result[bytes([idx >> 8, idx & 0xFF])] = int(counts[idx])
+    return result

From 6a20f62e0376fbff8d49b4c308bfd85e79e012ec Mon Sep 17 00:00:00 2001
From: shoredatalabs <ibrahim.shore@zoho.com>
Date: Wed, 13 May 2026 12:23:00 -0700
Subject: [PATCH 4/4] improve performance of corruption

---
 rounds/2_corruption/solution.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/rounds/2_corruption/solution.py b/rounds/2_corruption/solution.py
index a5b752a..8934d23 100644
--- a/rounds/2_corruption/solution.py
+++ b/rounds/2_corruption/solution.py
@@ -5,10 +5,27 @@
 own faster implementation.
 """
 
-from .baseline import find_corruptions as _baseline
+import numpy as np
 
 
 def find_corruptions(ref_path: str, cor_path: str) -> list[tuple[int, int]]:
     """Return ``[(offset, length), ...]`` for every differing byte range."""
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(ref_path, cor_path)
+    ref = np.fromfile(ref_path, dtype=np.uint8)
+    cor = np.fromfile(cor_path, dtype=np.uint8)
+
+    if len(ref) != len(cor):
+        raise ValueError("reference and corrupted files differ in length")
+
+    # Single vectorised comparison — runs entirely in C.
+    diff_indices = np.where(ref != cor)[0]
+
+    if len(diff_indices) == 0:
+        return []
+
+    # Find the boundaries between consecutive runs.
+    # A new run starts wherever the gap between adjacent indices exceeds 1.
+    gaps = np.where(np.diff(diff_indices) > 1)[0]
+    starts = diff_indices[np.concatenate(([0], gaps + 1))]
+    ends = diff_indices[np.concatenate((gaps, [len(diff_indices) - 1]))]
+
+    return [(int(s), int(e - s + 1)) for s, e in zip(starts, ends)]