From 14461d0f35ad9061daaa806fff4948f3e3b9429c Mon Sep 17 00:00:00 2001 From: Richard Liang Date: Tue, 23 Dec 2025 13:59:13 -0800 Subject: [PATCH 1/3] Closes #18. --- src/hla_algorithm/hla_algorithm.py | 43 +++++++++---------- src/hla_algorithm/models.py | 43 ++++++++++++++++++- tests/hla_algorithm_test.py | 16 +++++-- tests/models_test.py | 68 +++++++++++++++++++++++++++++- 4 files changed, 139 insertions(+), 31 deletions(-) diff --git a/src/hla_algorithm/hla_algorithm.py b/src/hla_algorithm/hla_algorithm.py index 369eadd..1679484 100644 --- a/src/hla_algorithm/hla_algorithm.py +++ b/src/hla_algorithm/hla_algorithm.py @@ -4,7 +4,7 @@ from io import TextIOBase from operator import attrgetter from pathlib import Path -from typing import Final, Optional, TypedDict, cast +from typing import Final, Optional, TypedDict import numpy as np import yaml @@ -18,12 +18,12 @@ HLASequence, HLAStandard, HLAStandardMatch, + MatchingAllelePair, ) from .utils import ( BIN2NUC, HLA_LOCUS, StoredHLAStandards, - allele_coordinates_sort_key, count_strict_mismatches, nuc2bin, sort_allele_pairs, @@ -138,7 +138,9 @@ def load_default_hla_standards() -> LoadedStandards: :return: List of known HLA standards :rtype: list[HLAStandard] """ - with open(HLAAlgorithm.DEFAULT_CONFIG_DIR / "hla_standards.yaml") as standards_file: + with open( + HLAAlgorithm.DEFAULT_CONFIG_DIR / "hla_standards.yaml" + ) as standards_file: return HLAAlgorithm.read_hla_standards(standards_file) FREQUENCY_LOCUS_COLUMNS: dict[HLA_LOCUS, tuple[str, str]] = { @@ -230,16 +232,12 @@ def combine_standards_stepper( matching_stds: Sequence[HLAStandardMatch], seq: Sequence[int], mismatch_threshold: int = 0, - ) -> Generator[tuple[tuple[int, ...], int, tuple[str, str]], None, None]: + ) -> Generator[MatchingAllelePair, None, None]: """ Identifies "good" combined standards for the specified sequence. On each iteration, it continues checking combined standards until it - finds a "match", and yields a tuple containing the details of that - match: - - the combined standard, as a tuple of integers 0-15; - - the number of mismatches identified; and - - the allele pair (i.e. names of the two alleles in the combination). + finds a "match", and yields a MatchingAllelePair containing its details. A "match" is defined by the number of mismatches between the combined standard and the sequence: @@ -263,15 +261,6 @@ def combine_standards_stepper( # "Mush" the two standards together to produce something # that looks like what you get when you sequence HLA. std_bin = np.array(std_b.sequence) | np.array(std_a.sequence) - allele_pair: tuple[str, str] = cast( - tuple[str, str], - tuple( - sorted( - (std_a.allele, std_b.allele), - key=allele_coordinates_sort_key, - ) - ), - ) # There could be more than one combined standard with the # same sequence, so check if this one's already been found. @@ -291,7 +280,11 @@ def combine_standards_stepper( elif mismatches < current_rejection_threshold: current_rejection_threshold = max(mismatches, mismatch_threshold) - yield (combined_std_bin, mismatches, allele_pair) + yield MatchingAllelePair.create_from_unsorted_alleles( + standard_bin=combined_std_bin, + mismatch_count=mismatches, + allele_names=(std_a.allele, std_b.allele), + ) @staticmethod def combine_standards( @@ -330,13 +323,15 @@ def combine_standards( combos: dict[tuple[int, ...], tuple[int, list[tuple[str, str]]]] = {} fewest_mismatches: int | float = float("inf") - for ( - combined_std_bin, - mismatches, - allele_pair, - ) in HLAAlgorithm.combine_standards_stepper( + for matching_allele_pair in HLAAlgorithm.combine_standards_stepper( matching_stds, seq, mismatch_threshold ): + combined_std_bin: tuple[int, ...] = matching_allele_pair.standard_bin + mismatches: int = matching_allele_pair.mismatch_count + allele_pair: tuple[str, str] = ( + matching_allele_pair.allele_1, + matching_allele_pair.allele_2, + ) if combined_std_bin not in combos: combos[combined_std_bin] = (mismatches, []) combos[combined_std_bin][1].append(allele_pair) diff --git a/src/hla_algorithm/models.py b/src/hla_algorithm/models.py index 86ea8b6..19ae05c 100644 --- a/src/hla_algorithm/models.py +++ b/src/hla_algorithm/models.py @@ -2,10 +2,10 @@ from collections.abc import Iterable from dataclasses import dataclass, field from operator import itemgetter -from typing import Final, Optional +from typing import Final, Optional, Self import numpy as np -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, model_validator from .utils import ( HLA_LOCUS, @@ -75,6 +75,45 @@ class HLAStandardMatch(HLAStandard): mismatch: int +class MatchingAllelePair(BaseModel): + """ + Represents an allele pair that matches an observed sequence. + + This contains: + - the combined standard, as a tuple of integers 0-15; + - the number of mismatches identified; and + - the allele pair (i.e. names of the two alleles in the combination). + """ + standard_bin: tuple[int, ...] + mismatch_count: int + allele_1: str + allele_2: str + + @model_validator(mode="after") + def check_alleles_ordered(self) -> Self: + if allele_coordinates_sort_key(self.allele_1) > allele_coordinates_sort_key(self.allele_2): + raise ValueError("allele_1 should be less than or equal to allele_2") + return self + + @classmethod + def create_from_unsorted_alleles( + cls, + standard_bin: tuple[int, ...], + mismatch_count: int, + allele_names: tuple[str, str], + ) -> Self: + sorted_allele_names: list[str] = sorted( + allele_names, + key=allele_coordinates_sort_key, + ) + return cls( + standard_bin=standard_bin, + mismatch_count=mismatch_count, + allele_1=sorted_allele_names[0], + allele_2=sorted_allele_names[1], + ) + + class HLACombinedStandard(BaseModel): """ Represents a combined HLA standard and all of its possible combinations. diff --git a/tests/hla_algorithm_test.py b/tests/hla_algorithm_test.py index 52249f9..d523087 100644 --- a/tests/hla_algorithm_test.py +++ b/tests/hla_algorithm_test.py @@ -21,6 +21,7 @@ HLASequence, HLAStandard, HLAStandardMatch, + MatchingAllelePair, ) from hla_algorithm.utils import GroupedAllele, HLARawStandard, StoredHLAStandards @@ -125,7 +126,7 @@ def hla_algorithm(): @pytest.mark.parametrize( - "sequence, matching_standards, thresholds, exp_result", + "sequence, matching_standards, thresholds, raw_exp_result", [ pytest.param( (1, 2, 4, 8), @@ -591,16 +592,25 @@ def test_combine_standards_stepper( sequence: Sequence[int], matching_standards: list[HLAStandardMatch], thresholds: list[int], - exp_result: list[tuple[tuple[int, ...], int, tuple[str, str]]], + raw_exp_result: list[tuple[tuple[int, ...], int, tuple[str, str]]], ): for threshold in thresholds: - result: list[tuple[tuple[int, ...], int, tuple[str, str]]] = list( + result: list[MatchingAllelePair] = list( HLAAlgorithm.combine_standards_stepper( matching_stds=matching_standards, seq=sequence, mismatch_threshold=threshold, ) ) + exp_result: list[MatchingAllelePair] = [ + MatchingAllelePair( + standard_bin=raw_map[0], + mismatch_count=raw_map[1], + allele_1=raw_map[2][0], + allele_2=raw_map[2][1], + ) + for raw_map in raw_exp_result + ] assert result == exp_result diff --git a/tests/models_test.py b/tests/models_test.py index cb21e59..43dcbf4 100644 --- a/tests/models_test.py +++ b/tests/models_test.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from pydantic import ValidationError from hla_algorithm.models import ( AllelePairs, @@ -13,6 +14,7 @@ HLAProteinPair, HLASequence, HLAStandard, + MatchingAllelePair, ) @@ -113,6 +115,61 @@ def test_sequence_np( assert np.array_equal(hla_standard.sequence_np, expected_array) +class TestMatchingAllelePair: + def test_validation_passes_alleles_strictly_different(self): + MatchingAllelePair( + standard_bin=(1,) * 546, + mismatch_count=2, + allele_1="B*57:01:01", + allele_2="B*57:01:04", + ) + + def test_validation_passes_alleles_equal(self): + MatchingAllelePair( + standard_bin=(1,) * 546, + mismatch_count=2, + allele_1="B*57:01:01", + allele_2="B*57:01:01", + ) + + def test_validation_failure(self): + with pytest.raises(ValidationError) as excinfo: + MatchingAllelePair( + standard_bin=(1,) * 546, + mismatch_count=2, + allele_1="B*57:01:11", + allele_2="B*57:01:01", + ) + assert "allele_1 should be less than or equal to allele_2" in str(excinfo.value) + + def test_create_from_unsorted_alleles_trivial_case(self): + map: MatchingAllelePair = MatchingAllelePair.create_from_unsorted_alleles( + standard_bin=(1,) * 546, + mismatch_count=3, + allele_names=("B*15:02:04", "B*17:01:33N"), + ) + assert "B*15:02:04" == map.allele_1 + assert "B*17:01:33N" == map.allele_2 + + def test_create_from_unsorted_alleles_trivial_case_equal_allele_names(self): + map: MatchingAllelePair = MatchingAllelePair.create_from_unsorted_alleles( + standard_bin=(1,) * 546, + mismatch_count=3, + allele_names=("B*15:02:04", "B*15:02:04"), + ) + assert "B*15:02:04" == map.allele_1 + assert "B*15:02:04" == map.allele_2 + + def test_create_from_unsorted_alleles_allele_order_corrected(self): + map: MatchingAllelePair = MatchingAllelePair.create_from_unsorted_alleles( + standard_bin=(1,) * 546, + mismatch_count=3, + allele_names=("B*17:01:33N", "B*15:02:04"), + ) + assert "B*15:02:04" == map.allele_1 + assert "B*17:01:33N" == map.allele_2 + + class TestHLACombinedStandard: @pytest.mark.parametrize( "allele_pairs, exp_allele_pair_str", @@ -1189,7 +1246,10 @@ def test_identify_clean_prefix_in_pairs( AllelePairs._identify_clean_prefix_in_pairs(unambiguous_pairs) ) if expected_second_prefix is not None: - assert {intermediate_result.common_prefix, intermediate_result.second_prefix} == { + assert { + intermediate_result.common_prefix, + intermediate_result.second_prefix, + } == { expected_common_prefix, expected_second_prefix, } @@ -1272,7 +1332,11 @@ def test_identify_clean_prefix_in_pairs( id="best_match_length_1_different_lengths_one_with_no_excess", ), pytest.param( - [("C*01", "07", "88"), ("C*01", "07", "01"), ("C*01", "07", "01", "110N")], + [ + ("C*01", "07", "88"), + ("C*01", "07", "01"), + ("C*01", "07", "01", "110N"), + ], ("C*01", "07"), id="typical_case", ), From b2ff8ee78c04eaa4fb6635701488adc42bfc1e39 Mon Sep 17 00:00:00 2001 From: Richard Liang Date: Wed, 24 Dec 2025 12:19:30 -0800 Subject: [PATCH 2/3] Closes #17. --- src/hla_algorithm/hla_algorithm.py | 61 +++++-- tests/hla_algorithm_test.py | 273 +++++++++++++++++++++++++++++ 2 files changed, 315 insertions(+), 19 deletions(-) diff --git a/src/hla_algorithm/hla_algorithm.py b/src/hla_algorithm/hla_algorithm.py index 1679484..3612b16 100644 --- a/src/hla_algorithm/hla_algorithm.py +++ b/src/hla_algorithm/hla_algorithm.py @@ -236,9 +236,21 @@ def combine_standards_stepper( """ Identifies "good" combined standards for the specified sequence. + Humans have two copies of their HLA genes, so when we use Sanger + sequencing to sequence a person's HLA, we get a single sequence with + potentially many mixtures. That is, at any position that the two genes + don't match, we see a nucleotide mixture consisting of the two + corresponding bases. + + In order to find matches, we take allele sequences (reduced to ones that + are already "decent" matches for our sequence, to reduce running time) + and "mush" them together to produce potential matches for our sequence. + On each iteration, it continues checking combined standards until it finds a "match", and yields a MatchingAllelePair containing its details. + PRECONDITION: matching_stds should contain no duplicates. + A "match" is defined by the number of mismatches between the combined standard and the sequence: - this is the best-matching combined standard found so far (may @@ -287,25 +299,12 @@ def combine_standards_stepper( ) @staticmethod - def combine_standards( - matching_stds: Sequence[HLAStandardMatch], - seq: Sequence[int], + def collate_matching_allele_pairs( + matching_allele_pairs: Iterable[MatchingAllelePair], mismatch_threshold: Optional[int] = None, ) -> dict[HLACombinedStandard, int]: """ - Find the combinations of standards that match the given sequence. - - Humans have two copies of their HLA genes, so when we use Sanger - sequencing to sequence a person's HLA, we get a single sequence with - potentially many mixtures. That is, at any position that the two genes - don't match, we see a nucleotide mixture consisting of the two - corresponding bases. - - In order to find matches, we take allele sequences (reduced to ones that - are already "decent" matches for our sequence, to reduce running time) - and "mush" them together to produce potential matches for our sequence. - - PRECONDITION: matching_stds should contain no duplicates. + Collate the given MatchingAllelePairs into HLACombinedStandards. Returns a dictionary mapping HLACombinedStandards to their mismatch counts. If mismatch_threshold is None or 0, then the result contains @@ -323,9 +322,7 @@ def combine_standards( combos: dict[tuple[int, ...], tuple[int, list[tuple[str, str]]]] = {} fewest_mismatches: int | float = float("inf") - for matching_allele_pair in HLAAlgorithm.combine_standards_stepper( - matching_stds, seq, mismatch_threshold - ): + for matching_allele_pair in matching_allele_pairs: combined_std_bin: tuple[int, ...] = matching_allele_pair.standard_bin mismatches: int = matching_allele_pair.mismatch_count allele_pair: tuple[str, str] = ( @@ -357,6 +354,32 @@ def combine_standards( return result + @staticmethod + def combine_standards( + matching_stds: Sequence[HLAStandardMatch], + seq: Sequence[int], + mismatch_threshold: Optional[int] = None, + ) -> dict[HLACombinedStandard, int]: + """ + Find the combinations of standards that match the given sequence. + + This uses combine_standards_stepper to find any putative matches, and + then uses collate_matching_allele_pairs to compile the information into + a dictionary mapping HLACombinedStandards to their mismatch counts. + + The parameters are as for combine_standards_stepper; mismatch_threshold + is also fed directly into collate_matching_allele_pairs and affects the + results accordingly. + """ + return HLAAlgorithm.collate_matching_allele_pairs( + HLAAlgorithm.combine_standards_stepper( + matching_stds, + seq, + mismatch_threshold if mismatch_threshold is not None else 0, + ), + mismatch_threshold, + ) + @staticmethod def get_mismatches( standard_bin: Sequence[int], diff --git a/tests/hla_algorithm_test.py b/tests/hla_algorithm_test.py index d523087..373e681 100644 --- a/tests/hla_algorithm_test.py +++ b/tests/hla_algorithm_test.py @@ -614,6 +614,279 @@ def test_combine_standards_stepper( assert result == exp_result +@pytest.mark.parametrize( + "raw_matching_allele_pairs, thresholds, exp_result", + [ + pytest.param( + [], + [None, 0, 1, 5], + {}, + id="trivial_case", + ), + pytest.param( + [((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G"))], + [0, 1, 5], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 8), + possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),), + ): 0, + }, + id="one_combo_all_matches", + ), + pytest.param( + [((1, 2, 4, 4), 1, ("B*57:01:02", "B*57:01:02"))], + [None, 0, 1, 5], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 4), + possible_allele_pairs=(("B*57:01:02", "B*57:01:02"),), + ): 1, + }, + id="one_combo_retained_regardless_of_threshold", + ), + pytest.param( + [((1, 4, 2, 8), 2, ("A*55:01", "A*55:01"))], + [None, 0, 1, 2, 3, 5], + { + HLACombinedStandard( + standard_bin=(1, 4, 2, 8), + possible_allele_pairs=(("A*55:01", "A*55:01"),), + ): 2 + }, + id="only_combo_retained_regardless_of_threshold_two_mismatches", + ), + pytest.param( + [((8, 4, 2, 1), 4, ("A*11:01:01:01", "A*11:01:01:01"))], + [None, 0, 1, 3, 4, 5, 10], + { + HLACombinedStandard( + standard_bin=(8, 4, 2, 1), + possible_allele_pairs=(("A*11:01:01:01", "A*11:01:01:01"),), + ): 4, + }, + id="only_combo_retained_regardless_of_threshold_more_mismatches", + ), + pytest.param( + [((1, 4, 2, 8), 2, ("A*55:01", "A*55:01"))], + [None, 0, 1, 2, 3, 5], + { + HLACombinedStandard( + standard_bin=(1, 4, 2, 8), + possible_allele_pairs=(("A*55:01", "A*55:01"),), + ): 2 + }, + id="only_combo_retained_regardless_of_threshold_two_mismatches", + ), + pytest.param( + [ + ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")), + ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")), + ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")), + ], + [None, 0], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 8), + possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),), + ): 0 + }, + id="exact_match_retained_rest_rejected_when_threshold_is_zero_last_in_inputs", + ), + pytest.param( + [ + ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")), + ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")), + ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")), + ], + [None, 0], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 8), + possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),), + ): 0 + }, + id="exact_match_retained_rest_rejected_when_threshold_is_zero_first_in_inputs", + ), + pytest.param( + [ + ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")), + ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")), + ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")), + ], + [None, 0], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 8), + possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),), + ): 0 + }, + id="exact_match_retained_rest_rejected_when_threshold_is_zero_middle_of_inputs", + ), + pytest.param( + [ + ((1, 2, 4, 8), 0, ("A*07:08:09G", "A*07:08:09G")), + ((1, 6, 4, 8), 1, ("A*07:08:09G", "A*30:08:01")), + ((1, 4, 4, 8), 1, ("A*30:08:01", "A*30:08:01")), + ], + [1, 2, 5], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 8), + possible_allele_pairs=(("A*07:08:09G", "A*07:08:09G"),), + ): 0, + HLACombinedStandard( + standard_bin=(1, 6, 4, 8), + possible_allele_pairs=(("A*07:08:09G", "A*30:08:01"),), + ): 1, + HLACombinedStandard( + standard_bin=(1, 4, 4, 8), + possible_allele_pairs=(("A*30:08:01", "A*30:08:01"),), + ): 1, + }, + id="several_combos_all_below_threshold", + ), + pytest.param( + [ + ((1, 2, 4, 4), 3, ("B*57:01:02", "B*57:01:02")), + ((9, 6, 4, 12), 1, ("B*57:01:02", "B*58:22:33G")), + ], + [None, 0, 1, 2], + { + HLACombinedStandard( + standard_bin=(9, 6, 4, 12), + possible_allele_pairs=(("B*57:01:02", "B*58:22:33G"),), + ): 1, + }, + id="combo_above_threshold_is_rejected", + ), + pytest.param( + [ + ((1, 2, 4, 4), 3, ("B*57:01:02", "B*57:01:02")), + ((9, 6, 4, 12), 1, ("B*57:01:02", "B*58:22:33G")), + ((8, 4, 4, 8), 3, ("B*58:22:33G", "B*58:22:33G")), + ], + [3, 4, 5], + { + HLACombinedStandard( + standard_bin=(1, 2, 4, 4), + possible_allele_pairs=(("B*57:01:02", "B*57:01:02"),), + ): 3, + HLACombinedStandard( + standard_bin=(9, 6, 4, 12), + possible_allele_pairs=(("B*57:01:02", "B*58:22:33G"),), + ): 1, + HLACombinedStandard( + standard_bin=(8, 4, 4, 8), + possible_allele_pairs=(("B*58:22:33G", "B*58:22:33G"),), + ): 3, + }, + id="all_combos_below_threshold", + ), + pytest.param( + [ + ((2, 2, 4, 12), 2, ("B*01:02:03", "B*01:02:04")), + ((2, 2, 4, 12), 2, ("B*11:22:33", "B*11:22:44")), + ], + [None, 0, 1, 2, 3, 10], + { + HLACombinedStandard( + standard_bin=(2, 2, 4, 12), + possible_allele_pairs=( + ("B*01:02:03", "B*01:02:04"), + ("B*11:22:33", "B*11:22:44"), + ), + ): 2 + }, + id="two_pairs_collapse_into_one_combined_standard", + ), + pytest.param( + [ + ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:22:02")), + ((2, 2, 4, 12), 2, ("B*57:01:02", "B*220:100:01")), + ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:15:01")), + ((2, 2, 4, 12), 2, ("B*57:03:01", "B*220:100:01")), + ], + [None, 0, 1, 2, 3, 10], + { + HLACombinedStandard( + standard_bin=(2, 2, 4, 12), + possible_allele_pairs=( + ("B*57:01:02", "B*220:100:01"), + ("B*57:03:01", "B*220:100:01"), + ("B*110:01:01:01N", "B*220:15:01"), + ("B*110:01:01:01N", "B*220:22:02"), + ), + ): 2, + }, + id="collapsed_pairs_sorted_by_coordinate", + ), + pytest.param( + [ + ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:22:02")), + ((1, 2, 8, 4), 3, ("B*15:01", "B*22:33")), + ((2, 2, 4, 12), 2, ("B*57:01:02", "B*220:100:01")), + ((2, 2, 4, 12), 2, ("B*110:01:01:01N", "B*220:15:01")), + ((3, 2, 4, 6), 1, ("B*01:02:03", "B*100:100")), + ((2, 2, 4, 12), 2, ("B*57:03:01", "B*220:100:01")), + ((1, 2, 4, 6), 0, ("B*88:01", "B*99:01")), + ((1, 2, 4, 6), 0, ("B*88:11", "B*99:11")), + ((4, 4, 1, 1), 4, ("B*67:67", "B*69:420")), + ], + [2], + { + HLACombinedStandard( + standard_bin=(2, 2, 4, 12), + possible_allele_pairs=( + ("B*57:01:02", "B*220:100:01"), + ("B*57:03:01", "B*220:100:01"), + ("B*110:01:01:01N", "B*220:15:01"), + ("B*110:01:01:01N", "B*220:22:02"), + ), + ): 2, + HLACombinedStandard( + standard_bin=(1, 2, 4, 6), + possible_allele_pairs=( + ("B*88:01", "B*99:01"), + ("B*88:11", "B*99:11"), + ), + ): 0, + HLACombinedStandard( + standard_bin=(3, 2, 4, 6), + possible_allele_pairs=(("B*01:02:03", "B*100:100"),), + ): 1, + }, + id="typical_case", + ), + ], +) +def test_collate_matching_allele_pairs( + raw_matching_allele_pairs: list[tuple[tuple[int, ...], int, tuple[str, str]]], + thresholds: Iterable[int], + exp_result: dict[HLACombinedStandard, int], +): + for threshold in thresholds: + matching_allele_pairs: list[MatchingAllelePair] = [ + MatchingAllelePair( + standard_bin=raw_map[0], + mismatch_count=raw_map[1], + allele_1=raw_map[2][0], + allele_2=raw_map[2][1], + ) + for raw_map in raw_matching_allele_pairs + ] + result: dict[HLACombinedStandard, int] = ( + HLAAlgorithm.collate_matching_allele_pairs( + matching_allele_pairs, + threshold, + ) + ) + assert result == exp_result + + +# Note: some of this testing can likely be eliminated. These tests were written +# before some of the logic was moved into collate_matching_allele_pairs, and +# so some of this testing may be redundant now. @pytest.mark.parametrize( "sequence, matching_standards, thresholds, exp_result", [ From d2f39b928c9ac1e8d6e55ef01455081690a75ef5 Mon Sep 17 00:00:00 2001 From: Richard Liang Date: Thu, 8 Jan 2026 15:28:35 -0800 Subject: [PATCH 3/3] Formally restrict the package to require Python 3.11, 3.12, or 3.13. We've encountered some issues with Python 3.14 on ARM; see issue #20. --- .github/workflows/test.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 51bbadd..4cb7a56 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.10", "3.11", "3.12", "3.x"] + python-version: ["3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 39d2a12..5c4fe3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" name = "hla_algorithm" description = 'Python implementation of the BC-CfE HLA interpretation algorithm' readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.11,<=3.13" license = "MIT" keywords = [] authors = [ @@ -17,7 +17,6 @@ authors = [ classifiers = [ "Development Status :: 5 - Production/Stable", "Programming Language :: Python", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13",