From 15b4e1bf04eb03ffd5ead00561c0a1e93abe1fb1 Mon Sep 17 00:00:00 2001 From: Cade Mirchandani Date: Mon, 25 May 2026 12:50:44 -0600 Subject: [PATCH] fix: force chrom as utf8 in pl.read_csv callsites --- workflow/scripts/fdr-table.py | 13 +++++++++---- workflow/scripts/merge_fire_peaks.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/workflow/scripts/fdr-table.py b/workflow/scripts/fdr-table.py index c78d9fb53..3c185ead3 100644 --- a/workflow/scripts/fdr-table.py +++ b/workflow/scripts/fdr-table.py @@ -57,11 +57,16 @@ def read_pileup_file(infile, nrows): return None # add scema overrides for the score columns + # Build schema overrides keyed by positional column names (column_1, column_2, ...) + # because polars infers schema BEFORE new_columns is applied when has_header=False. + # Keying on '#chrom' / 'score' here would be silently ignored. schema_overrides = {} - for n in ["score", "score_H1", "score_H2", "score_shuffled"]: - if n in header: - schema_overrides[n] = float - + for col_idx, col_name in enumerate(header, start=1): + positional = f"column_{col_idx}" + if col_name in ("score", "score_H1", "score_H2", "score_shuffled"): + schema_overrides[positional] = pl.Float64 + elif col_name == "#chrom": + schema_overrides[positional] = pl.Utf8 logging.info(f"Header of the pileup file:\n{header}") logging.info(f"Schema overrides for the pileup file:\n{schema_overrides}") diff --git a/workflow/scripts/merge_fire_peaks.py b/workflow/scripts/merge_fire_peaks.py index eaf0c33f5..8867cec37 100755 --- a/workflow/scripts/merge_fire_peaks.py +++ b/workflow/scripts/merge_fire_peaks.py @@ -124,7 +124,7 @@ def main( logger.setLevel(log_level) inf = io.StringIO(sys.stdin.read()) - df = pl.read_csv(inf, separator="\t", null_values=".") + df = pl.read_csv(inf, separator="\t", null_values=".", schema_overrides={"#chrom": pl.Utf8},) if df.shape[0] == 0: logging.info("No peaks to merge") return 0