Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions rounds/1_histogram/solution.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,20 @@
own faster implementation.
"""

import numpy as np


def compute_histogram(path: str) -> dict[bytes, int]:
"""Frequency of every 2-byte bigram in the file at ``path``."""
# TODO: remove this delegation and write your own implementation here.
from .baseline import compute_histogram as _baseline
with open(path, "rb") as f:
data = f.read()

arr = np.frombuffer(data, dtype=np.uint8)
# Encode each bigram as a uint16 index: high_byte * 256 + low_byte
indices = arr[:-1].astype(np.uint16) * 256 + arr[1:]
counts = np.bincount(indices, minlength=65536)

return _baseline(path)
result: dict[bytes, int] = {}
for idx in np.nonzero(counts)[0]:
result[bytes([idx >> 8, idx & 0xFF])] = int(counts[idx])
return result
23 changes: 20 additions & 3 deletions rounds/2_corruption/solution.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,27 @@
own faster implementation.
"""

from .baseline import find_corruptions as _baseline
import numpy as np


def find_corruptions(ref_path: str, cor_path: str) -> list[tuple[int, int]]:
"""Return ``[(offset, length), ...]`` for every differing byte range."""
# TODO: remove this delegation and write your own implementation here.
return _baseline(ref_path, cor_path)
ref = np.fromfile(ref_path, dtype=np.uint8)
cor = np.fromfile(cor_path, dtype=np.uint8)

if len(ref) != len(cor):
raise ValueError("reference and corrupted files differ in length")

# Single vectorised comparison — runs entirely in C.
diff_indices = np.where(ref != cor)[0]

if len(diff_indices) == 0:
return []

# Find the boundaries between consecutive runs.
# A new run starts wherever the gap between adjacent indices exceeds 1.
gaps = np.where(np.diff(diff_indices) > 1)[0]
starts = diff_indices[np.concatenate(([0], gaps + 1))]
ends = diff_indices[np.concatenate((gaps, [len(diff_indices) - 1]))]

return [(int(s), int(e - s + 1)) for s, e in zip(starts, ends)]
Loading