From cd617039d59febc80a8ed206962ca0afebebb9e9 Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Sun, 24 May 2026 14:07:52 -0600 Subject: [PATCH 1/6] feat: pin fibertools-rs to 0.9 this required bumping samtools, htslib and bedtools b/c of transitive dep conflicts. (liblzma, xz, and libzlib) --- workflow/envs/env.yaml | 8 ++++---- workflow/rules/fire-peaks.smk | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/workflow/envs/env.yaml b/workflow/envs/env.yaml index 07984df4c..27e2d5ab2 100644 --- a/workflow/envs/env.yaml +++ b/workflow/envs/env.yaml @@ -4,10 +4,10 @@ channels: - bioconda - defaults dependencies: - - samtools==1.19.1 - - htslib==1.19.1 - - bedtools==2.31 - - bioconda::fibertools-rs==0.6 + - samtools>=1.19.1 + - htslib>=1.19.1 + - bedtools>=2.31 + - bioconda::fibertools-rs==0.9 - hck>=0.9.2 - bioawk - ripgrep diff --git a/workflow/rules/fire-peaks.smk b/workflow/rules/fire-peaks.smk index f328534ab..b3a464841 100644 --- a/workflow/rules/fire-peaks.smk +++ b/workflow/rules/fire-peaks.smk @@ -34,7 +34,7 @@ rule shuffled_pileup_chromosome: DEFAULT_ENV shell: """ - {FT_EXE} pileup {input.cram} {wildcards.chrom} -t {threads} \ + {FT_EXE} pileup {input.cram} -r {wildcards.chrom} -t {threads} \ --fiber-coverage --shuffle {input.shuffled} \ --no-msp --no-nuc \ | bgzip -@ {threads} \ @@ -103,7 +103,7 @@ rule pileup_chromosome: """ {FT_EXE} pileup -t {threads} \ --haps --fiber-coverage \ - {input.bam} {wildcards.chrom} \ + {input.bam} -r {wildcards.chrom} \ | bgzip -@ {threads} \ > {output.bed} """ From f2a877614637962fc84f326d7daea43ca62ebeb9 Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Sun, 24 May 2026 14:08:41 -0600 Subject: [PATCH 2/6] chore: update pixi version; update pixi toml identifiers --- pixi.toml | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/pixi.toml b/pixi.toml index 431ab05e7..db2350554 100644 --- a/pixi.toml +++ b/pixi.toml @@ -1,41 +1,16 @@ -[project] +[workspace] authors = ["Mitchell Robert Vollger "] channels = ["conda-forge", "bioconda"] -description = "Add a short description here" +description = "A Snakemake pipeline for calling FIRE peaks using fibertools-rs." name = "FIRE" platforms = ["osx-64", "linux-64"] -version = "0.1.2" +version = "0.2.0" [tasks] fmt = "ruff format . && taplo format pixi.toml && snakefmt workflow/" -test-data = { cmd = [ - "cd", - "$INIT_CWD", - "&&", - "mkdir", - "-p", - "fire-test-data", - "&&", - "aws", - "s3", - "--no-sign-request", - "sync", - "--endpoint-url", - "https://s3.kopah.orci.washington.edu", - "s3://stergachis/public/FIRE/test-data", - "fire-test-data/", -] } -test = { cmd = [ - "cd", - "$INIT_CWD/fire-test-data", - "&&", - "snakemake", - "-s", - "$PIXI_PROJECT_ROOT/workflow/Snakefile", - "--configfile", - "test.yaml", - "-k", -], depends-on = [ +test-data = { cmd = '''bash -c 'if [ -f "$INIT_CWD/fire-test-data/test.cram" ]; then echo "test data already present, skipping download"; else mkdir -p "$INIT_CWD/fire-test-data" && aws s3 --no-sign-request sync --endpoint-url https://s3.kopah.orci.washington.edu s3://stergachis/public/FIRE/test-data "$INIT_CWD/fire-test-data/"; fi' ''' } +test-clean = { cmd = '''bash -c 'cd "$INIT_CWD/fire-test-data" && rm -rf results temp .snakemake' ''' } +test = { cmd = '''bash -c 'cd "$INIT_CWD/fire-test-data" && trap "rm -rf results temp .snakemake" EXIT && snakemake -s "$PIXI_PROJECT_ROOT/workflow/Snakefile" --configfile test.yaml -k' ''', depends-on = [ "test-data", ], clean-env = true } fire = { cmd = [ From c3ff3eb86a409a83d3ef4168c4aaf67a1d9fafeb Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Sun, 24 May 2026 14:12:16 -0600 Subject: [PATCH 3/6] chore: update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5769cd383..2b50f8c5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Change Log All notable changes to this project will be documented in this file. +## v0.2.0 +- Update fibertools-rs version (also samtools, htslib, and bedtools) in `workflow/envs/env.yaml` +- Fix Polars issue in #52 + ## v0.1.2 From 91ffd6573eba56feeffe84572baa535938a8f022 Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Sun, 24 May 2026 14:58:56 -0600 Subject: [PATCH 4/6] ci: update pixi version --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8af20c95b..70966031e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,8 +12,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: prefix-dev/setup-pixi@v0.8.1 + - uses: prefix-dev/setup-pixi@v0.9.6 with: - pixi-version: v0.37.0 + pixi-version: v0.66.0 cache: true - run: pixi run test From 5f387a8d52b53a6685e77dd3ad15fd9508b467f9 Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Mon, 25 May 2026 13:26:58 -0600 Subject: [PATCH 5/6] fix: force chrom as utf8 in pl.read_csv callsites (#56) --- workflow/scripts/fdr-table.py | 13 +++++++++---- workflow/scripts/merge_fire_peaks.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/workflow/scripts/fdr-table.py b/workflow/scripts/fdr-table.py index c78d9fb53..3c185ead3 100644 --- a/workflow/scripts/fdr-table.py +++ b/workflow/scripts/fdr-table.py @@ -57,11 +57,16 @@ def read_pileup_file(infile, nrows): return None # add scema overrides for the score columns + # Build schema overrides keyed by positional column names (column_1, column_2, ...) + # because polars infers schema BEFORE new_columns is applied when has_header=False. + # Keying on '#chrom' / 'score' here would be silently ignored. schema_overrides = {} - for n in ["score", "score_H1", "score_H2", "score_shuffled"]: - if n in header: - schema_overrides[n] = float - + for col_idx, col_name in enumerate(header, start=1): + positional = f"column_{col_idx}" + if col_name in ("score", "score_H1", "score_H2", "score_shuffled"): + schema_overrides[positional] = pl.Float64 + elif col_name == "#chrom": + schema_overrides[positional] = pl.Utf8 logging.info(f"Header of the pileup file:\n{header}") logging.info(f"Schema overrides for the pileup file:\n{schema_overrides}") diff --git a/workflow/scripts/merge_fire_peaks.py b/workflow/scripts/merge_fire_peaks.py index eaf0c33f5..8867cec37 100755 --- a/workflow/scripts/merge_fire_peaks.py +++ b/workflow/scripts/merge_fire_peaks.py @@ -124,7 +124,7 @@ def main( logger.setLevel(log_level) inf = io.StringIO(sys.stdin.read()) - df = pl.read_csv(inf, separator="\t", null_values=".") + df = pl.read_csv(inf, separator="\t", null_values=".", schema_overrides={"#chrom": pl.Utf8},) if df.shape[0] == 0: logging.info("No peaks to merge") return 0 From d53b14e81bee11210f55a2acc98563409e3a8a87 Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Wed, 27 May 2026 11:58:41 -0600 Subject: [PATCH 6/6] fix: handle exclude better --- workflow/rules/coverages.smk | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/workflow/rules/coverages.smk b/workflow/rules/coverages.smk index c354c76cc..ad388b0e0 100644 --- a/workflow/rules/coverages.smk +++ b/workflow/rules/coverages.smk @@ -126,13 +126,14 @@ rule exclude_from_shuffle: conda: DEFAULT_ENV params: - exclude=EXCLUDES, + exclude=lambda wc: " ".join(EXCLUDES) if EXCLUDES else "", shell: """ - - ( \ - bedtools genomecov -bga -i {input.filtered} -g {input.fai} | awk '$4 == 0'; \ - less {params.exclude} \ + ( + bedtools genomecov -bga -i {input.filtered} -g {input.fai} | awk '$4 == 0' + if [ -n "{params.exclude}" ]; then + zcat -f {params.exclude} + fi ) \ | cut -f 1-3 \ | bedtools sort \