Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ and, when paired with epitope prediction, estimating a population's
ability to mount an immune response to specific epitopes.

Automated download of allele frequency data download from
[allelefrequencies.net](http://www.allelefrequencies.net/).
[allelefrequencies.net](https://www.allelefrequencies.net/).

Full documentation at [HLAfreq/docs](https://BarinthusBio.github.io/HLAfreq/HLAfreq.html). Source code is available at [BarinthusBio/HLAfreq](https://github.com/BarinthusBio/HLAfreq).

Expand Down
2 changes: 1 addition & 1 deletion examples/paper/reproduce_paper_results2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
iedb_refb = [i for i in iedb_ref if "B" in i]

# Download countries in regions as defined on
# http://www.allelefrequencies.net/datasets.asp#tag_4
# https://www.allelefrequencies.net/datasets.asp#tag_4
r = requests.get("https://raw.githubusercontent.com/BarinthusBio/HLAfreq/main/data/example/countries.csv")
with open("data/example/countries.csv", "w") as f:
f.write(r.text)
Expand Down
2 changes: 1 addition & 1 deletion examples/paper/reproduce_paper_results3.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
pass

# Download countries in regions as defined on
# http://www.allelefrequencies.net/datasets.asp#tag_4
# https://www.allelefrequencies.net/datasets.asp#tag_4
r = requests.get("https://raw.githubusercontent.com/BarinthusBio/HLAfreq/main/data/example/countries.csv")
with open("data/example/countries.csv", "w") as f:
f.write(r.text)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="HLAfreq",
version="0.0.5",
version="0.0.6",
url="https://github.com/BarinthusBio/HLAfreq",
project_urls={
'Documentaion': "https://barinthusbio.github.io/HLAfreq/HLAfreq.html",
Expand Down
22 changes: 12 additions & 10 deletions src/HLAfreq/HLAfreq.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def makeURL(
):
"""Create URL for search of allele frequency net database.

All arguments are documented [here](http://www.allelefrequencies.net/extaccess.asp)
All arguments are documented [here](https://www.allelefrequencies.net/extaccess.asp)

Args:
country (str, optional): Country name to retrieve records from. Defaults to "".
Expand All @@ -78,10 +78,10 @@ def makeURL(
created using `resolution` and `resolution_pattern`. Defaults to 2.
region (str, optional): Filter to geographic region. {Asia, Australia,
Eastern Europe, ...}.
All regions listed [here](http://www.allelefrequencies.net/pop6003a.asp).
All regions listed [here](https://www.allelefrequencies.net/pop6003a.asp).
Defaults to "".
ethnic (str, optional): Filter to ethnicity. {"Amerindian", "Black", "Caucasian", ...}.
All ethnicities listed [here](http://www.allelefrequencies.net/pop6003a.asp).
All ethnicities listed [here](https://www.allelefrequencies.net/pop6003a.asp).
Defaults to "".
study_type (str, optional): Type of study. {"Anthropology", "Blood+Donor",
"Bone+Marrow+Registry", "Controls+for+Disease+Study", "Disease+Study+Patients",
Expand All @@ -104,7 +104,7 @@ def makeURL(
Returns:
str: URL to search allelefrequencies.net
"""
base = "http://www.allelefrequencies.net/hla6006a.asp?"
base = "https://www.allelefrequencies.net/hla6006a.asp?"
locus_type = "hla_locus_type=Classical&"
hla_locus = "hla_locus=%s&" % (locus)
country = "hla_country=%s&" % (country)
Expand Down Expand Up @@ -220,15 +220,15 @@ def formatAF(AFtab, ignoreG=True):
AFtab (pd.DataFrame): Allele frequency data downloaded from allelefrequency.net
using `getAFdata()`.
ignoreG (bool, optional): Treat G group alleles as normal.
See http://hla.alleles.org/alleles/g_groups.html for details. Defaults to True.
See https://hla.alleles.org/alleles/g_groups.html for details. Defaults to True.

Returns:
pd.DataFrame: The formatted allele frequency data.
"""
df = AFtab.copy()
if df.sample_size.dtype == "O":
if pd.api.types.is_string_dtype(df.sample_size.dtype):
df.sample_size = pd.to_numeric(df.sample_size.str.replace(",", ""))
if df.allele_freq.dtype == "O":
if pd.api.types.is_string_dtype(df.allele_freq.dtype):
if ignoreG:
df.allele_freq = df.allele_freq.str.replace("(*)", "", regex=False)
df.allele_freq = pd.to_numeric(df.allele_freq)
Expand All @@ -245,7 +245,7 @@ def getAFdata(base_url, timeout=20, format=True, ignoreG=True):
timeout (int): How long to wait to receive a response.
format (bool): Format the downloaded data using `formatAF()`.
ignoreG (bool): treat allele G groups as normal.
See http://hla.alleles.org/alleles/g_groups.html for details. Default = True
See https://hla.alleles.org/alleles/g_groups.html for details. Default = True

Returns:
pd.DataFrame: allele frequency data parsed into a pandas dataframe
Expand Down Expand Up @@ -393,7 +393,8 @@ def collapse_reduced_alleles(AFtab, datasetID="population"):
row.loci.unique()[0],
len(row.loci.unique()),
len(row.sample_size.unique()),
]
],
include_groups=False
)
collapsed = pd.DataFrame(
collapsed.tolist(),
Expand Down Expand Up @@ -555,7 +556,8 @@ def combineAF(
np.average(row.allele_freq, weights=row[weights]),
row.c.sum(),
row.sample_size.sum(),
]
],
include_groups=False
)
combined = pd.DataFrame(
combined.tolist(), columns=["allele", "loci", "wav", "c", "sample_size"]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_HLAfreq_pymc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_correct_c_array(aftab):
aftab['c'] = 2 * aftab.allele_freq * aftab.sample_size
c_pivot = aftab.pivot(columns="allele", index="population", values="c")
c_array = HLAhdi._make_c_array(aftab)
pytest.approx(c_array[0]) == c_pivot
assert pytest.approx(c_array[0]) == c_pivot


def test_correct_c_array_alleles(aftab):
Expand All @@ -43,7 +43,7 @@ def test_correct_c_array_alleles(aftab):
aftab['c'] = 2 * aftab.allele_freq * aftab.sample_size
c_pivot = aftab.pivot(columns="allele", index="population", values="c")
c_array = HLAhdi._make_c_array(aftab)
c_array[1] == c_pivot.columns
assert all(c_array[1] == c_pivot.columns)


def test_complete(aftab):
Expand Down
Loading