From 4b02341f9547bb0f5ca32fe840d304bf299dbd10 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 11:36:08 -0500 Subject: [PATCH 1/7] Add R-FFC to the README --- README.md | Bin 3490 -> 3623 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/README.md b/README.md index 44e07236fc85d4cf8bbed9929d294bbe2ed451d4..133070f1163c1db4bc7414f07b735f3d692925e1 100644 GIT binary patch delta 658 zcmY*W&ubGw6rN2s$!3yK+6EL2GO=i(bQ4L7AQhyVM6gy%tQM5EyUE66X}h!T>B#j@4au{d_4Q9{CYmkC>t%@HQ4CI zO@Qx}(-5H3)4VN`z?0}aKd_Zyl+=`Q-Nc9*8E4c>;}ra}22*>+Cg4O0QKUApsrs3^ z1711Z0_>!h&M_)|uV;6LHlEePG530L5@Pw+^tz^%%vB(GCP-3y=3)`+9LMRna=_GI za|ObU%zePOnF`>S%mHmOyLf@oSoN0havA!Y&lgUD1jj<{pFhEmsEI&piOFTXu=jz8`bf^LV?t)L3a& zcRi+_SXD^Bx8?wUSi;iMvE#LT{rF9u)R?PXYT~E;enL{YZ+PB=$wH;f!D7 zwmTF_43lx?jA|XS#?*)6@+&Rp1yu0#O8E9~B`n0!_^(imH-Ib~Ce` zO`fXQ6_VH!7V%p!cvlx73#Nu7-Uf4nIR_ICch5 zT?x1m8XH`{hf25-F5+c4I}7pwHBOkV8Tz68XV#X|HkjRa2CTm6B7TO8aUdjBs~eWZ aOcyRXeY&RViU|L%U^7w}{J4)~IQRwcnT@Le From 3db15da01015f3bdc92c83047ea9059a644ef0e8 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 12:00:28 -0500 Subject: [PATCH 2/7] Show Description of What Was Done --- rounds/1_histogram/solution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index dffbee5..9812199 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -8,7 +8,7 @@ def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - # TODO: remove this delegation and write your own implementation here. + # TODO: Add comment to push to branch from .baseline import compute_histogram as _baseline return _baseline(path) From 18ba26ada0b24f09147dd4986a2f0083c3dd2573 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 12:28:01 -0500 Subject: [PATCH 3/7] Show Description of What Was DoneUsed chatgpt AI for optimization --- rounds/1_histogram/solution.py | 40 ++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index 9812199..ed772cc 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -6,9 +6,41 @@ """ -def compute_histogram(path: str) -> dict[bytes, int]: +# def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - # TODO: Add comment to push to branch - from .baseline import compute_histogram as _baseline + # TODO: Used chatgpt for optimization of byte-pair histogram + # from .baseline import compute_histogram as _baseline - return _baseline(path) + #return _baseline(path) + +from array import array + + +def compute_histogram(path: str) -> list[int]: + """ + Frequency table for every 2-byte bigram. + + Result index: + index = (byte1 << 8) | byte2 + + Example: + b"AB" -> (65 << 8) | 66 + """ + with open(path, "rb") as f: + data = f.read() + + n = len(data) + if n < 2: + return [0] * 65536 + + # Fixed-size contiguous integer array + counts = array('I', [0]) * 65536 + + prev = data[0] + + for i in range(1, n): + curr = data[i] + counts[(prev << 8) | curr] += 1 + prev = curr + + return counts From a1659c64f51aab575339e1830c46f81a440a56b0 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 12:33:30 -0500 Subject: [PATCH 4/7] Used chatgpt AI for optimization 1 --- rounds/1_histogram/solution.py | 35 ++++++---------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index ed772cc..cadd1e3 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -13,34 +13,11 @@ #return _baseline(path) -from array import array +def histogram_dict(counts: list[int]) -> dict[bytes, int]: + out = {} + for i, count in enumerate(counts): + if count: + out[i.to_bytes(2, "big")] = count -def compute_histogram(path: str) -> list[int]: - """ - Frequency table for every 2-byte bigram. - - Result index: - index = (byte1 << 8) | byte2 - - Example: - b"AB" -> (65 << 8) | 66 - """ - with open(path, "rb") as f: - data = f.read() - - n = len(data) - if n < 2: - return [0] * 65536 - - # Fixed-size contiguous integer array - counts = array('I', [0]) * 65536 - - prev = data[0] - - for i in range(1, n): - curr = data[i] - counts[(prev << 8) | curr] += 1 - prev = curr - - return counts + return out From d03579da71f04e51a403fea2eddec0fe82a3cd29 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 13:00:07 -0500 Subject: [PATCH 5/7] Used chatgpt AI for optimization 2 --- rounds/1_histogram/solution.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index cadd1e3..58ecc5a 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -5,19 +5,17 @@ own faster implementation. """ +def compute_histogram(path: str) -> dict[bytes, int]: + with open(path, "rb") as f: + data = f.read() -# def compute_histogram(path: str) -> dict[bytes, int]: - """Frequency of every 2-byte bigram in the file at ``path``.""" - # TODO: Used chatgpt for optimization of byte-pair histogram - # from .baseline import compute_histogram as _baseline + counts = {} - #return _baseline(path) + for a, b in zip(data, data[1:]): + k = (a << 8) | b + counts[k] = counts.get(k, 0) + 1 -def histogram_dict(counts: list[int]) -> dict[bytes, int]: - out = {} - - for i, count in enumerate(counts): - if count: - out[i.to_bytes(2, "big")] = count - - return out + return { + k.to_bytes(2, "big"): v + for k, v in counts.items() + } \ No newline at end of file From 1355f5454f733d3887bca581adbc78657ea1b667 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 13:05:02 -0500 Subject: [PATCH 6/7] Used chatgpt AI for optimization 3 --- rounds/1_histogram/solution.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index 58ecc5a..d1c878d 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -5,17 +5,29 @@ own faster implementation. """ +from array import array + + def compute_histogram(path: str) -> dict[bytes, int]: with open(path, "rb") as f: data = f.read() - counts = {} + n = len(data) + if n < 2: + return {} + + # 65,536 possible 2-byte combinations + counts = array("I", [0]) * 65536 + + prev = data[0] - for a, b in zip(data, data[1:]): - k = (a << 8) | b - counts[k] = counts.get(k, 0) + 1 + for i in range(1, n): + curr = data[i] + counts[(prev << 8) | curr] += 1 + prev = curr return { - k.to_bytes(2, "big"): v - for k, v in counts.items() + i.to_bytes(2, "big"): count + for i, count in enumerate(counts) + if count } \ No newline at end of file From 777a37cd1e5a0ba853eac58671d4694481ecbd84 Mon Sep 17 00:00:00 2001 From: Raven Murphy Date: Wed, 13 May 2026 13:17:03 -0500 Subject: [PATCH 7/7] Used chatgpt AI for optimization 1 --- rounds/3_dna/solution.py | 56 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 8b917da..431d2ef 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -5,7 +5,23 @@ own faster implementation. """ -from .baseline import find_matches as _baseline +from __future__ import annotations + +def _find_positions(sequence: bytes, pattern: bytes) -> list[int]: + positions = [] + start = 0 + find = sequence.find + + while True: + pos = find(pattern, start) + + if pos == -1: + break + + positions.append(pos) + start = pos + 1 + + return positions def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]: @@ -13,5 +29,39 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] Returns ``[(record_id, [positions...]), ...]`` in file order. """ - # TODO: remove this delegation and write your own implementation here. - return _baseline(fasta_path, pattern) + matches: list[tuple[str, list[int]]] = [] + + with open(fasta_path, "rb") as f: + record_id = None + seq_parts = [] + + for line in f: + + if line.startswith(b">"): + + # process previous record + if record_id is not None: + sequence = b"".join(seq_parts) + + positions = _find_positions(sequence, pattern) + + if positions: + matches.append((record_id, positions)) + + # begin new FASTA record + record_id = line[1:].strip().decode("ascii") + seq_parts = [] + + else: + seq_parts.append(line.strip()) + + # process final record + if record_id is not None: + sequence = b"".join(seq_parts) + + positions = _find_positions(sequence, pattern) + + if positions: + matches.append((record_id, positions)) + + return matches