CodSpeedHQ · tim-schilling · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/README.md b/README.md
@@ -91,3 +91,4 @@ scripts/
 ```
 
 Each round's `data/` directory is generated locally and gitignored.
+This is Tim Schilling's PR
diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py
@@ -4,11 +4,27 @@
 passes out of the box. Replace the body of ``compute_histogram`` with your
 own faster implementation.
 """
+from collections import Counter
+from struct import unpack
+
+
+def get_biagrams(data):
+    data_iter = iter(unpack(f'{len(data)}c', data))
+    val_0, val_1 = next(data_iter), next(data_iter)
+    for value in data_iter:
+        yield val_0+val_1
+        val_0, val_1 = val_1, value
+    yield val_0+val_1
 
 
 def compute_histogram(path: str) -> dict[bytes, int]:
     """Frequency of every 2-byte bigram in the file at ``path``."""
-    # TODO: remove this delegation and write your own implementation here.
-    from .baseline import compute_histogram as _baseline
+    # Step 1: read the whole file into memory as a single bytes object.
+    with open(path, "rb") as f:
+        data = f.read()
 
-    return _baseline(path)
+    # Step 2: slide a 2-byte window across the buffer. For ``b"ABCD"`` the
+    # iterations produce ``b"AB"``, ``b"BC"``, then ``b"CD"``. For each window,
+    # bump the matching bucket in a ``dict`` keyed by the bigram itself.
+    counts = Counter(get_biagrams(data))
+    return counts
diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
@@ -4,14 +4,82 @@
 passes out of the box. Replace the body of ``find_matches`` with your
 own faster implementation.
 """
+import os
+from concurrent.futures import ThreadPoolExecutor
 
-from .baseline import find_matches as _baseline
+
+def _find_record_matches(pattern, sequence):
+    positions: list[int] = []
+    start = 0
+    while True:
+        pos = sequence.find(pattern, start)
+        if pos == -1:
+            break
+        positions.append(pos)
+        start = pos + 1
+    return positions
+
+
+def _search_chunk(fasta_path, chunk_start, chunk_end, pattern):
+    with open(fasta_path, "rb") as f:
+        f.seek(chunk_start)
+        if chunk_end is None:
+            text = f.read()
+        else:
+            # One bulk read for the chunk, then a few readline() calls to
+            # complete the last record that extends past our boundary.
+            # Collect parts in a list to avoid O(n²) bytes concatenation.
+            parts = [f.read(chunk_end - chunk_start)]
+            while True:
+                line = f.readline()
+                if not line or line.startswith(b">"):
+                    break
+                parts.append(line)
+            text = b"".join(parts)
+
+    # For chunks that don't start at byte 0, skip the partial-record fragment
+    # at the front (bytes belonging to the previous chunk's last record).
+    if chunk_start > 0:
+        if not text.startswith(b">"):
+            idx = text.find(b"\n>")
+            if idx == -1:
+                return []
+            text = text[idx + 1:]  # keep the ">"
+
+    results = []
+    for record in text.split(b">"):
+        if not record.strip():
+            continue
+        lines = record.split(b"\n")
+        record_id = lines[0].strip().decode("ascii")
+        sequence = b"".join(lines[1:]).replace(b" ", b"")
+        positions = _find_record_matches(pattern, sequence)
+        if positions:
+            results.append((record_id, positions))
+    return results
 
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
     """Find every FASTA record whose sequence contains ``pattern``.
 
     Returns ``[(record_id, [positions...]), ...]`` in file order.
     """
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(fasta_path, pattern)
+    num_threads = os.cpu_count() or 4
+    file_size = os.path.getsize(fasta_path)
+    chunk_size = max(1, file_size // num_threads)
+
+    chunks = [
+        (i * chunk_size, (i + 1) * chunk_size if i < num_threads - 1 else None)
+        for i in range(num_threads)
+    ]
+
+    with ThreadPoolExecutor(max_workers=num_threads) as executor:
+        futures = [
+            executor.submit(_search_chunk, fasta_path, start, end, pattern)
+            for start, end in chunks
+        ]
+
+    results = []
+    for future in futures:
+        results.extend(future.result())
+    return results
Original file line number	Diff line number	Diff line change
Expand Up		@@ -91,3 +91,4 @@ scripts/
		```

		Each round's `data/` directory is generated locally and gitignored.
		This is Tim Schilling's PR