From 14461d0f35ad9061daaa806fff4948f3e3b9429c Mon Sep 17 00:00:00 2001
From: Richard Liang <rliang@cfenet.ubc.ca>
Date: Tue, 23 Dec 2025 13:59:13 -0800
Subject: [PATCH 1/3] Closes #18.

---
 src/hla_algorithm/hla_algorithm.py | 43 +++++++++----------
 src/hla_algorithm/models.py        | 43 ++++++++++++++++++-
 tests/hla_algorithm_test.py        | 16 +++++--
 tests/models_test.py               | 68 +++++++++++++++++++++++++++++-
 4 files changed, 139 insertions(+), 31 deletions(-)

diff --git a/src/hla_algorithm/hla_algorithm.py b/src/hla_algorithm/hla_algorithm.py
index 369eadd..1679484 100644
--- a/src/hla_algorithm/hla_algorithm.py
+++ b/src/hla_algorithm/hla_algorithm.py
@@ -4,7 +4,7 @@
 from io import TextIOBase
 from operator import attrgetter
 from pathlib import Path
-from typing import Final, Optional, TypedDict, cast
+from typing import Final, Optional, TypedDict
 
 import numpy as np
 import yaml
@@ -18,12 +18,12 @@
     HLASequence,
     HLAStandard,
     HLAStandardMatch,
+    MatchingAllelePair,
 )
 from .utils import (
     BIN2NUC,
     HLA_LOCUS,
     StoredHLAStandards,
-    allele_coordinates_sort_key,
     count_strict_mismatches,
     nuc2bin,
     sort_allele_pairs,
@@ -138,7 +138,9 @@ def load_default_hla_standards() -> LoadedStandards:
         :return: List of known HLA standards
         :rtype: list[HLAStandard]
         """
-        with open(HLAAlgorithm.DEFAULT_CONFIG_DIR / "hla_standards.yaml") as standards_file:
+        with open(
+            HLAAlgorithm.DEFAULT_CONFIG_DIR / "hla_standards.yaml"
+        ) as standards_file:
             return HLAAlgorithm.read_hla_standards(standards_file)
 
     FREQUENCY_LOCUS_COLUMNS: dict[HLA_LOCUS, tuple[str, str]] = {
@@ -230,16 +232,12 @@ def combine_standards_stepper(
         matching_stds: Sequence[HLAStandardMatch],
         seq: Sequence[int],
         mismatch_threshold: int = 0,
-    ) -> Generator[tuple[tuple[int, ...], int, tuple[str, str]], None, None]:
+    ) -> Generator[MatchingAllelePair, None, None]:
         """
         Identifies "good" combined standards for the specified sequence.
 
         On each iteration, it continues checking combined standards until it
-        finds a "match", and yields a tuple containing the details of that
-        match:
-        - the combined standard, as a tuple of integers 0-15;
-        - the number of mismatches identified; and
-        - the allele pair (i.e. names of the two alleles in the combination).
+        finds a "match", and yields a MatchingAllelePair containing its details.
 
         A "match" is defined by the number of mismatches between the combined
         standard and the sequence:
@@ -263,15 +261,6 @@ def combine_standards_stepper(
                 # "Mush" the two standards together to produce something
                 # that looks like what you get when you sequence HLA.
                 std_bin = np.array(std_b.sequence) | np.array(std_a.sequence)
-                allele_pair: tuple[str, str] = cast(
-                    tuple[str, str],
-                    tuple(
-                        sorted(
-                            (std_a.allele, std_b.allele),
-                            key=allele_coordinates_sort_key,
-                        )
-                    ),
-                )
 
                 # There could be more than one combined standard with the
                 # same sequence, so check if this one's already been found.
@@ -291,7 +280,11 @@ def combine_standards_stepper(
                 elif mismatches < current_rejection_threshold:
                     current_rejection_threshold = max(mismatches, mismatch_threshold)
 
-                yield (combined_std_bin, mismatches, allele_pair)
+                yield MatchingAllelePair.create_from_unsorted_alleles(
+                    standard_bin=combined_std_bin,
+                    mismatch_count=mismatches,
+                    allele_names=(std_a.allele, std_b.allele),
+                )
 
     @staticmethod
     def combine_standards(
@@ -330,13 +323,15 @@ def combine_standards(
         combos: dict[tuple[int, ...], tuple[int, list[tuple[str, str]]]] = {}
 
         fewest_mismatches: int | float = float("inf")
-        for (
-            combined_std_bin,
-            mismatches,
-            allele_pair,
-        ) in HLAAlgorithm.combine_standards_stepper(
+        for matching_allele_pair in HLAAlgorithm.combine_standards_stepper(
             matching_stds, seq, mismatch_threshold
         ):
+            combined_std_bin: tuple[int, ...] = matching_allele_pair.standard_bin
+            mismatches: int = matching_allele_pair.mismatch_count
+            allele_pair: tuple[str, str] = (
+                matching_allele_pair.allele_1,
+                matching_allele_pair.allele_2,
+            )
             if combined_std_bin not in combos:
                 combos[combined_std_bin] = (mismatches, [])
             combos[combined_std_bin][1].append(allele_pair)
diff --git a/src/hla_algorithm/models.py b/src/hla_algorithm/models.py
index 86ea8b6..19ae05c 100644
--- a/src/hla_algorithm/models.py
+++ b/src/hla_algorithm/models.py
@@ -2,10 +2,10 @@
 from collections.abc import Iterable
 from dataclasses import dataclass, field
 from operator import itemgetter
-from typing import Final, Optional
+from typing import Final, Optional, Self
 
 import numpy as np
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, model_validator
 
 from .utils import (
     HLA_LOCUS,
@@ -75,6 +75,45 @@ class HLAStandardMatch(HLAStandard):
     mismatch: int
 
 
+class MatchingAllelePair(BaseModel):
+    """
+    Represents an allele pair that matches an observed sequence.
+
+    This contains:
+    - the combined standard, as a tuple of integers 0-15;
+    - the number of mismatches identified; and
+    - the allele pair (i.e. names of the two alleles in the combination).
+    """
+    standard_bin: tuple[int, ...]
+    mismatch_count: int
+    allele_1: str
+    allele_2: str
+
+    @model_validator(mode="after")
+    def check_alleles_ordered(self) -> Self:
+        if allele_coordinates_sort_key(self.allele_1) > allele_coordinates_sort_key(self.allele_2):
+            raise ValueError("allele_1 should be less than or equal to allele_2")
+        return self
+
+    @classmethod
+    def create_from_unsorted_alleles(
+        cls,
+        standard_bin: tuple[int, ...],
+        mismatch_count: int,
+        allele_names: tuple[str, str],
+    ) -> Self:
+        sorted_allele_names: list[str] = sorted(
+            allele_names,
+            key=allele_coordinates_sort_key,
+        )
+        return cls(
+            standard_bin=standard_bin,
+            mismatch_count=mismatch_count,
+            allele_1=sorted_allele_names[0],
+            allele_2=sorted_allele_names[1],
+        )
+
+
 class HLACombinedStandard(BaseModel):
     """
     Represents a combined HLA standard and all of its possible combinations.
diff --git a/tests/hla_algorithm_test.py b/tests/hla_algorithm_test.py
index 52249f9..d523087 100644
--- a/tests/hla_algorithm_test.py
+++ b/tests/hla_algorithm_test.py
@@ -21,6 +21,7 @@
     HLASequence,
     HLAStandard,
     HLAStandardMatch,
+    MatchingAllelePair,
 )
 from hla_algorithm.utils import GroupedAllele, HLARawStandard, StoredHLAStandards
 
@@ -125,7 +126,7 @@ def hla_algorithm():
 
 
 @pytest.mark.parametrize(
-    "sequence, matching_standards, thresholds, exp_result",
+    "sequence, matching_standards, thresholds, raw_exp_result",
     [
         pytest.param(
             (1, 2, 4, 8),
@@ -591,16 +592,25 @@ def test_combine_standards_stepper(
     sequence: Sequence[int],
     matching_standards: list[HLAStandardMatch],
     thresholds: list[int],
-    exp_result: list[tuple[tuple[int, ...], int, tuple[str, str]]],
+    raw_exp_result: list[tuple[tuple[int, ...], int, tuple[str, str]]],
 ):
     for threshold in thresholds:
-        result: list[tuple[tuple[int, ...], int, tuple[str, str]]] = list(
+        result: list[MatchingAllelePair] = list(
             HLAAlgorithm.combine_standards_stepper(
                 matching_stds=matching_standards,
                 seq=sequence,
                 mismatch_threshold=threshold,
             )
         )
+        exp_result: list[MatchingAllelePair] = [
+            MatchingAllelePair(
+                standard_bin=raw_map[0],
+                mismatch_count=raw_map[1],
+                allele_1=raw_map[2][0],
+                allele_2=raw_map[2][1],
+            )
+            for raw_map in raw_exp_result
+        ]
         assert result == exp_result
 
 
diff --git a/tests/models_test.py b/tests/models_test.py
index cb21e59..43dcbf4 100644
--- a/tests/models_test.py
+++ b/tests/models_test.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pytest
+from pydantic import ValidationError
 
 from hla_algorithm.models import (
     AllelePairs,
@@ -13,6 +14,7 @@
     HLAProteinPair,
     HLASequence,
     HLAStandard,
+    MatchingAllelePair,
 )
 
 
@@ -113,6 +115,61 @@ def test_sequence_np(
         assert np.array_equal(hla_standard.sequence_np, expected_array)
 
 
+class TestMatchingAllelePair:
+    def test_validation_passes_alleles_strictly_different(self):
+        MatchingAllelePair(
+            standard_bin=(1,) * 546,
+            mismatch_count=2,
+            allele_1="B*57:01:01",
+            allele_2="B*57:01:04",
+        )
+
+    def test_validation_passes_alleles_equal(self):
+        MatchingAllelePair(
+            standard_bin=(1,) * 546,
+            mismatch_count=2,
+            allele_1="B*57:01:01",
+            allele_2="B*57:01:01",
+        )
+
+    def test_validation_failure(self):
+        with pytest.raises(ValidationError) as excinfo:
+            MatchingAllelePair(
+                standard_bin=(1,) * 546,
+                mismatch_count=2,
+                allele_1="B*57:01:11",
+                allele_2="B*57:01:01",
+            )
+        assert "allele_1 should be less than or equal to allele_2" in str(excinfo.value)
+
+    def test_create_from_unsorted_alleles_trivial_case(self):
+        map: MatchingAllelePair = MatchingAllelePair.create_from_unsorted_alleles(
+            standard_bin=(1,) * 546,
+            mismatch_count=3,
+            allele_names=("B*15:02:04", "B*17:01:33N"),
+        )
+        assert "B*15:02:04" == map.allele_1
+        assert "B*17:01:33N" == map.allele_2
+
+    def test_create_from_unsorted_alleles_trivial_case_equal_allele_names(self):
+        map: MatchingAllelePair = MatchingAllelePair.create_from_unsorted_alleles(
+            standard_bin=(1,) * 546,
+            mismatch_count=3,
+            allele_names=("B*15:02:04", "B*15:02:04"),
+        )
+        assert "B*15:02:04" == map.allele_1
+        assert "B*15:02:04" == map.allele_2
+
+    def test_create_from_unsorted_alleles_allele_order_corrected(self):
+        map: MatchingAllelePair = MatchingAllelePair.create_from_unsorted_alleles(
+            standard_bin=(1,) * 546,
+            mismatch_count=3,
+            allele_names=("B*17:01:33N", "B*15:02:04"),
+        )
+        assert "B*15:02:04" == map.allele_1
+        assert "B*17:01:33N" == map.allele_2
+
+
 class TestHLACombinedStandard:
     @pytest.mark.parametrize(
         "allele_pairs, exp_allele_pair_str",
@@ -1189,7 +1246,10 @@ def test_identify_clean_prefix_in_pairs(
             AllelePairs._identify_clean_prefix_in_pairs(unambiguous_pairs)
         )
         if expected_second_prefix is not None:
-            assert {intermediate_result.common_prefix, intermediate_result.second_prefix} == {
+            assert {
+                intermediate_result.common_prefix,
+                intermediate_result.second_prefix,
+            } == {
                 expected_common_prefix,
                 expected_second_prefix,
             }
@@ -1272,7 +1332,11 @@ def test_identify_clean_prefix_in_pairs(
                 id="best_match_length_1_different_lengths_one_with_no_excess",
             ),
             pytest.param(
-                [("C*01", "07", "88"), ("C*01", "07", "01"), ("C*01", "07", "01", "110N")],
+                [
+                    ("C*01", "07", "88"),
+                    ("C*01", "07", "01"),
+                    ("C*01", "07", "01", "110N"),
+                ],
                 ("C*01", "07"),
                 id="typical_case",
             ),

From b2ff8ee78c04eaa4fb6635701488adc42bfc1e39 Mon Sep 17 00:00:00 2001
From: Richard Liang <rliang@cfenet.ubc.ca>
Date: Wed, 24 Dec 2025 12:19:30 -0800
Subject: [PATCH 2/3] Closes #17.

---
 src/hla_algorithm/hla_algorithm.py |  61 +++++--
 tests/hla_algorithm_test.py        | 273 +++++++++++++++++++++++++++++
 2 files changed, 315 insertions(+), 19 deletions(-)

diff --git a/src/hla_algorithm/hla_algorithm.py b/src/hla_algorithm/hla_algorithm.py
index 1679484..3612b16 100644
--- a/src/hla_algorithm/hla_algorithm.py
+++ b/src/hla_algorithm/hla_algorithm.py
@@ -236,9 +236,21 @@ def combine_standards_stepper(
         """
         Identifies "good" combined standards for the specified sequence.
 
+        Humans have two copies of their HLA genes, so when we use Sanger
+        sequencing to sequence a person's HLA, we get a single sequence with
+        potentially many mixtures.  That is, at any position that the two genes
+        don't match, we see a nucleotide mixture consisting of the two
+        corresponding bases.
+
+        In order to find matches, we take allele sequences (reduced to ones that
+        are already "decent" matches for our sequence, to reduce running time)
+        and "mush" them together to produce potential matches for our sequence.
+
         On each iteration, it continues checking combined standards until it
         finds a "match", and yields a MatchingAllelePair containing its details.
 
+        PRECONDITION: matching_stds should contain no duplicates.
+
         A "match" is defined by the number of mismatches between the combined
         standard and the sequence:
         - this is the best-matching combined standard found so far (may
@@ -287,25 +299,12 @@ def combine_standards_stepper(
                 )
 
     @staticmethod
-    def combine_standards(
-        matching_stds: Sequence[HLAStandardMatch],
-        seq: Sequence[int],
+    def collate_matching_allele_pairs(
+        matching_allele_pairs: Iterable[MatchingAllelePair],
         mismatch_threshold: Optional[int] = None,
     ) -> dict[HLACombinedStandard, int]:
         """
-        Find the combinations of standards that match the given sequence.
-
-        Humans have two copies of their HLA genes, so when we use Sanger
-        sequencing to sequence a person's HLA, we get a single sequence with
-        potentially many mixtures.  That is, at any position that the two genes
-        don't match, we see a nucleotide mixture consisting of the two
-        corresponding bases.
-
-        In order to find matches, we take allele sequences (reduced to ones that
-        are already "decent" matches for our sequence, to reduce running time)
-        and "mush" them together to produce potential matches for our sequence.
-
-        PRECONDITION: matching_stds should contain no duplicates.
+        Collate the given MatchingAllelePairs into HLACombinedStandards.
 
         Returns a dictionary mapping HLACombinedStandards to their mismatch
         counts.  If mismatch_threshold is None or 0, then the result contains
@@ -323,9 +322,7 @@ def combine_standards(
         combos: dict[tuple[int, ...], tuple[int, list[tuple[str, str]]]] = {}
 
         fewest_mismatches: int | float = float("inf")
-        for matching_allele_pair in HLAAlgorithm.combine_standards_stepper(
-            matching_stds, seq, mismatch_threshold
-        ):
+        for matching_allele_pair in matching_allele_pairs:
             combined_std_bin: tuple[int, ...] = matching_allele_pair.standard_bin
             mismatches: int = matching_allele_pair.mismatch_count
             allele_pair: tuple[str, str] = (
@@ -357,6 +354,32 @@ def combine_standards(
 
         return result
 
+    @staticmethod
+    def combine_standards(
+        matching_stds: Sequence[HLAStandardMatch],
+        seq: Sequence[int],
+        mismatch_threshold: Optional[int] = None,
+    ) -> dict[HLACombinedStandard, int]:
+        """
+        Find the combinations of standards that match the given sequence.
+
+        This uses combine_standards_stepper to find any putative matches, and
+        then uses collate_matching_allele_pairs to compile the information into
+        a dictionary mapping HLACombinedStandards to their mismatch counts.
+
+        The parameters are as for combine_standards_stepper; mismatch_threshold
+        is also fed directly into collate_matching_allele_pairs and affects the
+        results accordingly.
+        """
+        return HLAAlgorithm.collate_matching_allele_pairs(
+            HLAAlgorithm.combine_standards_stepper(
+                matching_stds,
+                seq,
+                mismatch_threshold if mismatch_threshold is not None else 0,
+            ),
+            mismatch_threshold,
+        )
+
     @staticmethod
     def get_mismatches(
         standard_bin: Sequence[int],
diff --git a/tests/hla_algorithm_test.py b/tests/hla_algorithm_test.py
index d523087..373e681 100644
--- a/tests/hla_algorithm_test.py
+++ b/tests/hla_algorithm_test.py
@@ -614,6 +614,279 @@ def test_combine_standards_stepper(
         assert result == exp_result
 
 
+@pytest.mark.parametrize(
+    "raw_matching_allele_pairs, thresholds, exp_result",
+    [
+        pytest.param(
+            [],
+            [None, 0, 1, 5],
+            {},
+            id="trivial_case",
+        ),
+        pytest.param(
+            [((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G"))],
+            [0, 1, 5],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 8),
+                    possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),),
+                ): 0,
+            },
+            id="one_combo_all_matches",
+        ),
+        pytest.param(
+            [((1, 2, 4, 4), 1, ("B*57:01:02", "B*57:01:02"))],
+            [None, 0, 1, 5],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 4),
+                    possible_allele_pairs=(("B*57:01:02", "B*57:01:02"),),
+                ): 1,
+            },
+            id="one_combo_retained_regardless_of_threshold",
+        ),
+        pytest.param(
+            [((1, 4, 2, 8), 2, ("A*55:01", "A*55:01"))],
+            [None, 0, 1, 2, 3, 5],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 4, 2, 8),
+                    possible_allele_pairs=(("A*55:01", "A*55:01"),),
+                ): 2
+            },
+            id="only_combo_retained_regardless_of_threshold_two_mismatches",
+        ),
+        pytest.param(
+            [((8, 4, 2, 1), 4, ("A*11:01:01:01", "A*11:01:01:01"))],
+            [None, 0, 1, 3, 4, 5, 10],
+            {
+                HLACombinedStandard(
+                    standard_bin=(8, 4, 2, 1),
+                    possible_allele_pairs=(("A*11:01:01:01", "A*11:01:01:01"),),
+                ): 4,
+            },
+            id="only_combo_retained_regardless_of_threshold_more_mismatches",
+        ),
+        pytest.param(
+            [((1, 4, 2, 8), 2, ("A*55:01", "A*55:01"))],
+            [None, 0, 1, 2, 3, 5],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 4, 2, 8),
+                    possible_allele_pairs=(("A*55:01", "A*55:01"),),
+                ): 2
+            },
+            id="only_combo_retained_regardless_of_threshold_two_mismatches",
+        ),
+        pytest.param(
+            [
+                ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")),
+                ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")),
+                ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")),
+            ],
+            [None, 0],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 8),
+                    possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),),
+                ): 0
+            },
+            id="exact_match_retained_rest_rejected_when_threshold_is_zero_last_in_inputs",
+        ),
+        pytest.param(
+            [
+                ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")),
+                ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")),
+                ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")),
+            ],
+            [None, 0],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 8),
+                    possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),),
+                ): 0
+            },
+            id="exact_match_retained_rest_rejected_when_threshold_is_zero_first_in_inputs",
+        ),
+        pytest.param(
+            [
+                ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")),
+                ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")),
+                ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")),
+            ],
+            [None, 0],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 8),
+                    possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),),
+                ): 0
+            },
+            id="exact_match_retained_rest_rejected_when_threshold_is_zero_middle_of_inputs",
+        ),
+        pytest.param(
+            [
+                ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")),
+                ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")),
+                ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")),
+            ],
+            [1, 2, 5],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 8),
+                    possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),),
+                ): 0,
+                HLACombinedStandard(
+                    standard_bin=(1, 6, 4, 8),
+                    possible_allele_pairs=(("A*07:08:09G", "A*30:08:01"),),
+                ): 1,
+                HLACombinedStandard(
+                    standard_bin=(1, 4, 4, 8),
+                    possible_allele_pairs=(("A*30:08:01", "A*30:08:01"),),
+                ): 1,
+            },
+            id="several_combos_all_below_threshold",
+        ),
+        pytest.param(
+            [
+                ((1, 2, 4, 4), 3, ("B*57:01:02", "B*57:01:02")),
+                ((9, 6, 4, 12), 1, ("B*57:01:02", "B*58:22:33G")),
+            ],
+            [None, 0, 1, 2],
+            {
+                HLACombinedStandard(
+                    standard_bin=(9, 6, 4, 12),
+                    possible_allele_pairs=(("B*57:01:02", "B*58:22:33G"),),
+                ): 1,
+            },
+            id="combo_above_threshold_is_rejected",
+        ),
+        pytest.param(
+            [
+                ((1, 2, 4, 4), 3, ("B*57:01:02", "B*57:01:02")),
+                ((9, 6, 4, 12), 1, ("B*57:01:02", "B*58:22:33G")),
+                ((8, 4, 4, 8), 3, ("B*58:22:33G", "B*58:22:33G")),
+            ],
+            [3, 4, 5],
+            {
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 4),
+                    possible_allele_pairs=(("B*57:01:02", "B*57:01:02"),),
+                ): 3,
+                HLACombinedStandard(
+                    standard_bin=(9, 6, 4, 12),
+                    possible_allele_pairs=(("B*57:01:02", "B*58:22:33G"),),
+                ): 1,
+                HLACombinedStandard(
+                    standard_bin=(8, 4, 4, 8),
+                    possible_allele_pairs=(("B*58:22:33G", "B*58:22:33G"),),
+                ): 3,
+            },
+            id="all_combos_below_threshold",
+        ),
+        pytest.param(
+            [
+                ((2, 2, 4, 12), 2, ("B*01:02:03", "B*01:02:04")),
+                ((2, 2, 4, 12), 2, ("B*11:22:33", "B*11:22:44")),
+            ],
+            [None, 0, 1, 2, 3, 10],
+            {
+                HLACombinedStandard(
+                    standard_bin=(2, 2, 4, 12),
+                    possible_allele_pairs=(
+                        ("B*01:02:03", "B*01:02:04"),
+                        ("B*11:22:33", "B*11:22:44"),
+                    ),
+                ): 2
+            },
+            id="two_pairs_collapse_into_one_combined_standard",
+        ),
+        pytest.param(
+            [
+                ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:22:02")),
+                ((2, 2, 4, 12), 2, ("B*57:01:02", "B*220:100:01")),
+                ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:15:01")),
+                ((2, 2, 4, 12), 2, ("B*57:03:01", "B*220:100:01")),
+            ],
+            [None, 0, 1, 2, 3, 10],
+            {
+                HLACombinedStandard(
+                    standard_bin=(2, 2, 4, 12),
+                    possible_allele_pairs=(
+                        ("B*57:01:02", "B*220:100:01"),
+                        ("B*57:03:01", "B*220:100:01"),
+                        ("B*110:01:01:01N", "B*220:15:01"),
+                        ("B*110:01:01:01N", "B*220:22:02"),
+                    ),
+                ): 2,
+            },
+            id="collapsed_pairs_sorted_by_coordinate",
+        ),
+        pytest.param(
+            [
+                ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:22:02")),
+                ((1, 2, 8, 4), 3, ("B*15:01", "B*22:33")),
+                ((2, 2, 4, 12), 2, ("B*57:01:02", "B*220:100:01")),
+                ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:15:01")),
+                ((3, 2, 4, 6), 1, ("B*01:02:03", "B*100:100")),
+                ((2, 2, 4, 12), 2, ("B*57:03:01", "B*220:100:01")),
+                ((1, 2, 4, 6), 0, ("B*88:01", "B*99:01")),
+                ((1, 2, 4, 6), 0, ("B*88:11", "B*99:11")),
+                ((4, 4, 1, 1), 4, ("B*67:67", "B*69:420")),
+            ],
+            [2],
+            {
+                HLACombinedStandard(
+                    standard_bin=(2, 2, 4, 12),
+                    possible_allele_pairs=(
+                        ("B*57:01:02", "B*220:100:01"),
+                        ("B*57:03:01", "B*220:100:01"),
+                        ("B*110:01:01:01N", "B*220:15:01"),
+                        ("B*110:01:01:01N", "B*220:22:02"),
+                    ),
+                ): 2,
+                HLACombinedStandard(
+                    standard_bin=(1, 2, 4, 6),
+                    possible_allele_pairs=(
+                        ("B*88:01", "B*99:01"),
+                        ("B*88:11", "B*99:11"),
+                    ),
+                ): 0,
+                HLACombinedStandard(
+                    standard_bin=(3, 2, 4, 6),
+                    possible_allele_pairs=(("B*01:02:03", "B*100:100"),),
+                ): 1,
+            },
+            id="typical_case",
+        ),
+    ],
+)
+def test_collate_matching_allele_pairs(
+    raw_matching_allele_pairs: list[tuple[tuple[int, ...], int, tuple[str, str]]],
+    thresholds: Iterable[int],
+    exp_result: dict[HLACombinedStandard, int],
+):
+    for threshold in thresholds:
+        matching_allele_pairs: list[MatchingAllelePair] = [
+            MatchingAllelePair(
+                standard_bin=raw_map[0],
+                mismatch_count=raw_map[1],
+                allele_1=raw_map[2][0],
+                allele_2=raw_map[2][1],
+            )
+            for raw_map in raw_matching_allele_pairs
+        ]
+        result: dict[HLACombinedStandard, int] = (
+            HLAAlgorithm.collate_matching_allele_pairs(
+                matching_allele_pairs,
+                threshold,
+            )
+        )
+        assert result == exp_result
+
+
+# Note: some of this testing can likely be eliminated.  These tests were written
+# before some of the logic was moved into collate_matching_allele_pairs, and
+# so some of this testing may be redundant now.
 @pytest.mark.parametrize(
     "sequence, matching_standards, thresholds, exp_result",
     [

From d2f39b928c9ac1e8d6e55ef01455081690a75ef5 Mon Sep 17 00:00:00 2001
From: Richard Liang <rliang@cfenet.ubc.ca>
Date: Thu, 8 Jan 2026 15:28:35 -0800
Subject: [PATCH 3/3] Formally restrict the package to require Python 3.11,
 3.12, or 3.13.

We've encountered some issues with Python 3.14 on ARM; see issue #20.
---
 .github/workflows/test.yml | 2 +-
 pyproject.toml             | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 51bbadd..4cb7a56 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.10", "3.11", "3.12", "3.x"]
+        python-version: ["3.11", "3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/pyproject.toml b/pyproject.toml
index 39d2a12..5c4fe3c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
 name = "hla_algorithm"
 description = 'Python implementation of the BC-CfE HLA interpretation algorithm'
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11,<=3.13"
 license = "MIT"
 keywords = []
 authors = [
@@ -17,7 +17,6 @@ authors = [
 classifiers = [
   "Development Status :: 5 - Production/Stable",
   "Programming Language :: Python",
-  "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",