Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions spirepy/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@

@memory.cache
def cluster_metadata():
"""Fetches and caches the SPIRE cluster metadata from the remote server."""
"""Fetches and caches the SPIRE cluster metadata from the remote server.

This is slow for the first time, as it downloads a large file, but
subsequent calls will use the cached version.

:return: A DataFrame with the SPIRE cluster metadata.
"""
return pl.read_csv(
"https://swifter.embl.de/~fullam/spire/metadata/spire_v1_cluster_metadata.tsv.gz",
separator="\t",
Expand All @@ -17,7 +23,13 @@ def cluster_metadata():

@memory.cache
def genome_metadata():
"""Fetches and caches the SPIRE genome metadata from the remote server."""
"""Fetches and caches the SPIRE genome metadata from the remote server.

This is slow for the first time, as it downloads a large file, but
subsequent calls will use the cached version.

:return: A DataFrame with the SPIRE genome metadata.
"""
return pl.read_csv(
"https://swifter.embl.de/~fullam/spire/metadata/spire_v1_genome_metadata.tsv.gz",
separator="\t",
Expand Down
2 changes: 1 addition & 1 deletion spirepy/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
handler = RichHandler(level=logging.WARNING, markup=True)
handler.setFormatter(formatter)

logger = logging.getLogger("micro_fr_pred")
logger = logging.getLogger("SPIREpy")
logger.addHandler(handler)
10 changes: 7 additions & 3 deletions spirepy/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os.path as path
import urllib.request

import pandas as pd
import polars as pl

from spirepy.data import cluster_metadata
Expand Down Expand Up @@ -63,9 +62,11 @@ def get_mags(self) -> pl.dataframe.DataFrame:
if self._mags is None:
cluster_meta = cluster_metadata()
metadata = self.get_metadata()
clusters = metadata.filter(metadata["spire_cluster"] != "null")
clusters = metadata.filter(
pl.col("spire_cluster").is_not_null()
)
mags = cluster_meta.filter(
cluster_meta["spire_cluster"].is_in(clusters["spire_cluster"])
pl.col("spire_cluster").is_in(clusters["spire_cluster"])
)
mags = mags.join(clusters, on="spire_cluster")
mags = mags.select(
Expand All @@ -83,6 +84,9 @@ def get_eggnog_data(self) -> pl.dataframe.DataFrame:
:rtype: :class:`polars.dataframe.DataFrame`
"""
if self._eggnog_data is None:
# We need to use pandas because polars does not support reading
# files with a footer
import pandas as pd
egg = pd.read_csv(
f"https://spire.embl.de/download_eggnog/{self.id}",
sep="\t",
Expand Down
6 changes: 3 additions & 3 deletions spirepy/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ def get_mags(self) -> pl.dataframe.DataFrame:
if self._mags is None:
genomes = genome_metadata()
self._mags = genomes.filter(
genomes["derived_from_sample"].is_in(
self.get_metadata()["sample_id"].to_list()
pl.col("derived_from_sample").is_in(
self.get_metadata()["sample_id"].to_list()
)
)
)
return self._mags

def download_mags(self, output: str):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_get_mags(
mock_cluster_metadata.assert_called_once()
assert_frame_equal(result2, expected_mags)

@patch("spirepy.sample.pd.read_csv")
@patch("pandas.read_csv")
def test_get_eggnog_data(self, mock_read_csv: MagicMock):
"""Tests get_eggnog_data for data retrieval and caching."""
mock_df = pd.DataFrame({"gene": ["gene1"], "annotation": ["annot1"]})
Expand Down
Loading