CodSpeedHQ · RossK1 · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/README.md b/README.md
@@ -91,3 +91,4 @@ scripts/
 ```
 
 Each round's `data/` directory is generated locally and gitignored.
+This is RossK1's PR
diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py
@@ -1,14 +1,15 @@
-"""Your Round 1 solution — byte-pair histogram.
+"""Your Round 1 solution — byte-pair histogram."""
 
-**Edit this file.** It currently delegates to ``baseline.py`` so everything
-passes out of the box. Replace the body of ``compute_histogram`` with your
-own faster implementation.
-"""
+import numpy as np
+import mmap
 
 
 def compute_histogram(path: str) -> dict[bytes, int]:
     """Frequency of every 2-byte bigram in the file at ``path``."""
-    # TODO: remove this delegation and write your own implementation here.
-    from .baseline import compute_histogram as _baseline
-
-    return _baseline(path)
+    with open(path, "rb") as f:
+        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
+            # Copy while mmap is still open — no exported pointer issue
+            data = np.frombuffer(mm, dtype=np.uint8).copy()
+        keys = data[:-1].astype(np.uint16) << 8 | data[1:].astype(np.uint16)
+        counts = np.bincount(keys, minlength=65536)
+    return {bytes([k >> 8, k & 0xFF]): int(counts[k]) for k in np.nonzero(counts)[0]}
diff --git a/rounds/2_corruption/solution.py b/rounds/2_corruption/solution.py
@@ -1,14 +1,30 @@
-"""Your Round 2 solution — corruption scanner.
+from __future__ import annotations
 
-**Edit this file.** It currently delegates to ``baseline.py`` so everything
-passes out of the box. Replace the body of ``find_corruptions`` with your
-own faster implementation.
-"""
-
-from .baseline import find_corruptions as _baseline
+import numpy as np
 
 
 def find_corruptions(ref_path: str, cor_path: str) -> list[tuple[int, int]]:
     """Return ``[(offset, length), ...]`` for every differing byte range."""
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(ref_path, cor_path)
+    with open(ref_path, "rb") as f:
+        ref = np.frombuffer(f.read(), dtype=np.uint8)
+    with open(cor_path, "rb") as f:
+        cor = np.frombuffer(f.read(), dtype=np.uint8)
+
+    if len(ref) != len(cor):
+        raise ValueError("reference and corrupted files differ in length")
+
+    # Boolean mask of differing positions
+    mask = ref != cor
+    if not mask.any():
+        return []
+
+    # Find run boundaries using diff on the mask
+    padded = np.empty(len(mask) + 2, dtype=np.int8)
+    padded[0] = 0
+    padded[1:-1] = mask.view(np.int8)
+    padded[-1] = 0
+    d = np.diff(padded.astype(np.int8))
+    starts = np.where(d == 1)[0]
+    ends = np.where(d == -1)[0]
+
+    return [(int(s), int(e - s)) for s, e in zip(starts, ends)]
diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
@@ -1,17 +1,53 @@
-"""Your Round 3 solution — DNA sequence matcher.
+"""Your Round 3 solution — DNA sequence matcher."""
 
-**Edit this file.** It currently delegates to ``baseline.py`` so everything
-passes out of the box. Replace the body of ``find_matches`` with your
-own faster implementation.
-"""
-
-from .baseline import find_matches as _baseline
+from __future__ import annotations
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import mmap
+import os
 
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
     """Find every FASTA record whose sequence contains ``pattern``.
 
     Returns ``[(record_id, [positions...]), ...]`` in file order.
     """
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(fasta_path, pattern)
+    # Read as bytes — no decode overhead, pattern stays as bytes.
+    with open(fasta_path, "rb") as f:
+        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
+            data = bytes(mm)
+        size = len(data)
+
+    # Find record boundaries without copying data
+    offsets = [0]
+    pos = data.find(b">", 1)
+    while pos != -1:
+        offsets.append(pos)
+        pos = data.find(b">", pos + 1)
+    offsets.append(size)
+
+    def process_record(
+        start: int, end: int, idx: int
+    ) -> tuple[int, tuple[str, list[int]]] | None:
+        chunk = data[start:end]
+        lines = chunk.split(b"\n")
+        record_id = lines[0][1:].rstrip().decode("ascii")
+        sequence = b"".join(lines[1:])
+        positions = []
+        start_pos = 0
+        while (hit := sequence.find(pattern, start_pos)) != -1:
+            positions.append(hit)
+            start_pos = hit + 1
+        if positions:
+            return (idx, (record_id, positions))
+        return None
+
+    max_workers = min(32, (os.cpu_count() or 1) * 2)
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [
+            executor.submit(process_record, offsets[i], offsets[i + 1], i)
+            for i in range(len(offsets) - 1)
+        ]
+        results = [r for f in as_completed(futures) if (r := f.result()) is not None]
+
+    results.sort(key=lambda x: x[0])
+    return [r for _, r in results]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -91,3 +91,4 @@ scripts/
		```

		Each round's `data/` directory is generated locally and gitignored.
		This is RossK1's PR