From 18426787a99b19429e03b8de646cec02c956af5a Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 12:26:43 -0400
Subject: [PATCH 1/7] initial for making my pr happen, lolol

---
 myfile | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 myfile
diff --git a/myfile b/myfile
new file mode 100644
index 0000000..e69de29

From 5dc46554edcc5f74b7e9e0c02348aa58ab91c81d Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 13:03:44 -0400
Subject: [PATCH 2/7] iteration one

Signed-off-by: Drew Wock <dwock@esri.com>
---
 rounds/1_histogram/solution.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py
index dffbee5..24a8136 100644
--- a/rounds/1_histogram/solution.py
+++ b/rounds/1_histogram/solution.py
@@ -5,10 +5,30 @@
 own faster implementation.
 """
 
+from concurrent.futures import ProcessPoolExecutor
+import os
+
+CHUNK_SIZE = 8 * 1024 * 1024
 
 def compute_histogram(path: str) -> dict[bytes, int]:
     """Frequency of every 2-byte bigram in the file at ``path``."""
     # TODO: remove this delegation and write your own implementation here.
-    from .baseline import compute_histogram as _baseline
+    with open(path, "rb") as f:
+        data = f.read()
 
-    return _baseline(path)
+    counts: list[int] = [0] * 65536
+    if len(data) == 0:
+        return {}
+    data_iter = iter(data)
+    window_idx = next(data_iter)
+    for b in data_iter:
+        window_idx <<= 8
+        window_idx &= 0xff00
+        window_idx |= b
+        counts[window_idx] += 1
+    d = {}
+    for i,cnt in enumerate(counts):
+        if counts[i] != 0:
+            b = i.to_bytes(2, byteorder="big")
+            d[b] = cnt
+    return d

From 8af615dee4dd174b49ed969be587011cf6e18df8 Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 14:33:05 -0400
Subject: [PATCH 3/7] dna round

Signed-off-by: Drew Wock <dwock@esri.com>
---
 rounds/3_dna/solution.py | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index 8b917da..deb9cbd 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -5,8 +5,7 @@
 own faster implementation.
 """
 
-from .baseline import find_matches as _baseline
-
+import re
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
     """Find every FASTA record whose sequence contains ``pattern``.
@@ -14,4 +13,34 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
     Returns ``[(record_id, [positions...]), ...]`` in file order.
     """
     # TODO: remove this delegation and write your own implementation here.
-    return _baseline(fasta_path, pattern)
+    # Step 1: read the whole FASTA file as text and decode the pattern so the
+    # search below can use a single ``str`` API.
+    pattern_str = pattern.decode("ascii")
+    with open(fasta_path, "r") as f:
+        text = f.read()
+
+    matches: list[tuple[str, list[int]]] = []
+    pattern_str = pattern.decode('ascii')
+    regex = re.compile(pattern_str)
+
+    # Step 2: split the file on '>' to peel off one record at a time. The
+    # first element is the chunk before any header (empty for well-formed
+    # files) and is skipped by the ``.strip()`` guard below.
+    for record in text.split(">"):
+        if not record.strip():
+            continue
+
+        # Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
+        # The id is the first line; the remaining lines are joined back into a
+        # single contiguous sequence string.
+        lines = record.split("\n")
+        record_id = lines[0].strip()
+        sequence = "".join(lines[1:]).replace(" ", "")
+
+        positions: list[int] = []
+        print(sequence)
+        for m in regex.finditer(sequence):
+            positions.append(m.start())
+        if positions:
+            matches.append((record_id, positions))
+    return matches

From d2b310dd6caa72499dc7f2af27fb85cbc68b82ee Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 14:37:30 -0400
Subject: [PATCH 4/7] dna 02

Signed-off-by: Drew Wock <dwock@esri.com>
---
 rounds/3_dna/solution.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index deb9cbd..9d057ec 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -15,7 +15,6 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
     # TODO: remove this delegation and write your own implementation here.
     # Step 1: read the whole FASTA file as text and decode the pattern so the
     # search below can use a single ``str`` API.
-    pattern_str = pattern.decode("ascii")
     with open(fasta_path, "r") as f:
         text = f.read()
 
@@ -38,9 +37,7 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
         sequence = "".join(lines[1:]).replace(" ", "")
 
         positions: list[int] = []
-        print(sequence)
-        for m in regex.finditer(sequence):
-            positions.append(m.start())
+        positions = [m.start() for m in regex.finditer(sequence)]
         if positions:
             matches.append((record_id, positions))
     return matches

From be73d3432dee526740a577eadec8d101d8054510 Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 14:48:26 -0400
Subject: [PATCH 5/7] dna threads

Signed-off-by: Drew Wock <dwock@esri.com>
---
 rounds/3_dna/solution.py | 49 +++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index 9d057ec..2ff5d5a 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -6,6 +6,24 @@
 """
 
 import re
+from concurrent.futures import ThreadPoolExecutor
+
+def find_match(args):
+        regex,record = args
+        # Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
+        # The id is the first line; the remaining lines are joined back into a
+        # single contiguous sequence string.
+        lines = record.split("\n")
+        record_id = lines[0].strip()
+        sequence = "".join(lines[1:]).replace(" ", "")
+
+        positions: list[int] = []
+        positions = [m.start() for m in regex.finditer(sequence)]
+        if positions:
+            return (record_id, positions)
+        else:
+            return None
+
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
     """Find every FASTA record whose sequence contains ``pattern``.
@@ -22,22 +40,17 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
     pattern_str = pattern.decode('ascii')
     regex = re.compile(pattern_str)
 
-    # Step 2: split the file on '>' to peel off one record at a time. The
-    # first element is the chunk before any header (empty for well-formed
-    # files) and is skipped by the ``.strip()`` guard below.
-    for record in text.split(">"):
-        if not record.strip():
-            continue
-
-        # Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
-        # The id is the first line; the remaining lines are joined back into a
-        # single contiguous sequence string.
-        lines = record.split("\n")
-        record_id = lines[0].strip()
-        sequence = "".join(lines[1:]).replace(" ", "")
-
-        positions: list[int] = []
-        positions = [m.start() for m in regex.finditer(sequence)]
-        if positions:
-            matches.append((record_id, positions))
+    with ThreadPoolExecutor() as ex:
+        futures = []
+        for record in text.split(">"):
+            if not record.strip():
+                continue
+
+            t = ex.submit(find_match, args=(regex,record))
+            futures.append(t)
+    
+        for t in futures:
+            result = t.result()
+            if result:
+                matches.append(result)
     return matches

From a1d83132743dbf1b21d39bac0a364b3d0f7dff36 Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 14:55:12 -0400
Subject: [PATCH 6/7] Undid my regression that used regex, lol

Signed-off-by: Drew Wock <dwock@esri.com>
---
 rounds/3_dna/solution.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index 2ff5d5a..8b5b934 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -9,7 +9,7 @@
 from concurrent.futures import ThreadPoolExecutor
 
 def find_match(args):
-        regex,record = args
+        pattern_str,record = args
         # Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
         # The id is the first line; the remaining lines are joined back into a
         # single contiguous sequence string.
@@ -18,7 +18,14 @@ def find_match(args):
         sequence = "".join(lines[1:]).replace(" ", "")
 
         positions: list[int] = []
-        positions = [m.start() for m in regex.finditer(sequence)]
+        start = 0
+        while True:
+            pos = sequence.find(pattern_str, start)
+            if pos == -1:
+                break
+            positions.append(pos)
+            start = pos + 1
+
         if positions:
             return (record_id, positions)
         else:
@@ -38,7 +45,6 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
 
     matches: list[tuple[str, list[int]]] = []
     pattern_str = pattern.decode('ascii')
-    regex = re.compile(pattern_str)
 
     with ThreadPoolExecutor() as ex:
         futures = []
@@ -46,7 +52,7 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
             if not record.strip():
                 continue
 
-            t = ex.submit(find_match, args=(regex,record))
+            t = ex.submit(find_match, args=(pattern_str,record))
             futures.append(t)
     
         for t in futures:

From 687935298ca1ef5e484e9a7f15a1fafb6a5a1a27 Mon Sep 17 00:00:00 2001
From: Drew Wock <dwock@esri.com>
Date: Wed, 13 May 2026 15:20:48 -0400
Subject: [PATCH 7/7] Bytes mode

Signed-off-by: Drew Wock <dwock@esri.com>
---
 rounds/3_dna/solution.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index 8b5b934..8e61013 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -6,16 +6,20 @@
 """
 
 import re
+import string
 from concurrent.futures import ThreadPoolExecutor
 
+table = bytes.maketrans(b"", b"")
+
 def find_match(args):
         pattern_str,record = args
         # Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
         # The id is the first line; the remaining lines are joined back into a
         # single contiguous sequence string.
-        lines = record.split("\n")
+        lines = record.split(b'\n', 1)
         record_id = lines[0].strip()
-        sequence = "".join(lines[1:]).replace(" ", "")
+        sequence_raw = lines[1]
+        sequence = sequence_raw.translate(table, delete=string.whitespace.encode())
 
         positions: list[int] = []
         start = 0
@@ -27,7 +31,7 @@ def find_match(args):
             start = pos + 1
 
         if positions:
-            return (record_id, positions)
+            return (record_id.decode('ascii'), positions)
         else:
             return None
 
@@ -40,19 +44,18 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
     # TODO: remove this delegation and write your own implementation here.
     # Step 1: read the whole FASTA file as text and decode the pattern so the
     # search below can use a single ``str`` API.
-    with open(fasta_path, "r") as f:
+    with open(fasta_path, "rb") as f:
         text = f.read()
 
     matches: list[tuple[str, list[int]]] = []
-    pattern_str = pattern.decode('ascii')
 
     with ThreadPoolExecutor() as ex:
         futures = []
-        for record in text.split(">"):
+        for record in text.split(b">"):
             if not record.strip():
                 continue
 
-            t = ex.submit(find_match, args=(pattern_str,record))
+            t = ex.submit(find_match, args=(pattern,record))
             futures.append(t)
     
         for t in futures: