Skip to content

Edge weights must not be NaN values. -- Invalid value #204

@fgonzalez3

Description

@fgonzalez3

Hi there! I am running the below Snakemake workflow to bin some short read assemblies. I'm testing my workflow on two samples before I run it on my entire sample set, but I am encountering the error described in the title for one of those samples. Below, I've written out the workflow, in addition to the error. I haven't seen this issue brought up so far, so I'm hoping to get some feedback on where I could be producing the corrupted file in question. I'm happy to provide the preceding steps in the workflow if it would be of any aid. Thank you!

Workflow -

rule semibin2_generate_concatenated_db_spades:
    """
    Generate concatenated FASTA file necessary for SembiBin's multi-sample binning pipeline
    """
    input:
        contigs = expand("results/{genera}/3_dedup_contigs/SPAdes/individual_metagenome_assembly/{sample}/{sample}_DEDUP95.fasta", genera=config["genera"], sample=SAMPLES)
    output:
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/generate_concatenated_db/concatenated.fa"
    params:
        outdir = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/generate_concatenated_db",
        threads = 4
    log:
        stdout = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/generate_concatenated_db/concatenate_fa.out",
        stderr = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/generate_concatenated_db/concatenate_fa.err"
    shell:
        """
        module unload miniconda
        source activate /vast/palmer/pi/turner/flg9/conda_envs/semibin

        SemiBin2 concatenate_fasta \
        --input-fasta {input.contigs} \
        --output {params.outdir} --compression=none
        """

rule sembin2_align_to_concatenated_db_spades:
    """
    Align reads from each sample to our concatenated FASTA db, necessary for SemiBin pipeline
    """
    input:
        db = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/generate_concatenated_db/concatenated.fa",
        r1 = "results/{genera}/1_pre_processing/dedup_reads/{sample}/{sample}_host_removed_dedup_R1.fastq",
        r2 = "results/{genera}/1_pre_processing/dedup_reads/{sample}/{sample}_host_removed_dedup_R2.fastq"
    output:
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}/{sample}_indexed_contig.1.bt2",
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}/{sample}_indexed_contig.2.bt2",
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}/{sample}_indexed_contig.3.bt2",
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}/{sample}_indexed_contig.4.bt2",
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}/{sample}_indexed_contig.rev.1.bt2",
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}/{sample}_indexed_contig.rev.2.bt2",
        "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}_aligned_sorted.bam"
    params:
        genera=config["genera"],
        outdir = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}"
    log:
        stdout = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}_aln.out",
        stderr = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}_aln.err"
    shell:
        """
        module unload miniconda 
        module load Bowtie2/2.5.1-GCC-12.2.0
        module load SAMtools/1.21-GCC-12.2.0

        # 1. Build db index
        bowtie2-build \
        -f {input.db} {params.outdir}/{wildcards.sample}_indexed_contig \
        1>> {log.stdout} 2>> {log.stderr}

        # 2. Align reads back to SemiBin db index
        bowtie2 \
        -x {params.outdir}/{wildcards.sample}_indexed_contig -1 {input.r1} -2 {input.r2} | samtools view -b -F 4 -F 2048 | samtools sort -o {output[6]} \
        1>> {log.stdout} 2>> {log.stderr}
        """

rule semibin2_features_and_model_spades:
    """
    Generate sequence features and train model for SemiBin2
    """
    input:
        cat_fa = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/generate_concatenated_db/concatenated.fa",
        bams = expand("results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/align_to_concatenated_db/{sample}_aligned_sorted.bam", genera=config["genera"], sample=SAMPLES)
    output:
        split = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/data_split.csv",
        csv = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/data.csv"
    params:
        outdir = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model",
        outdir2 =  "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}",
        threads = 4
    log:
        stdout = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/sequence_features.out",
        stderr = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/sequence_features.err"
    shell:
        """
        module unload miniconda
        source activate /vast/palmer/pi/turner/flg9/conda_envs/semibin

        # Generate sequence features data.csv & data_split.csv files
        SemiBin2 generate_sequence_features_multi \
        -i {input.cat_fa} \
        -b {input.bams} \
        -o {params.outdir} \
        -t {params.threads} \
        1>> {log.stdout} 2>> {log.stderr}

        mv {params.outdir}/samples/{wildcards.sample}_DEDUP95/data.csv {params.outdir2}
        mv {params.outdir}/samples/{wildcards.sample}_DEDUP95/data_split.csv {params.outdir2}
        """

rule semibin2_train_model_spades:
    """
    Train ML model on previously curated SemiBin2 feature data
    """
    input:
        split = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/data_split.csv",
        csv = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/data.csv"
    output:
        model = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/train_model/{sample}/model.pt"
    params:
        outdir = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/train_model/{sample}",
        threads = 4
    log:
        stdout = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/train_model/{sample}/ML_train.out",
        stderr = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/train_model/{sample}/ML_train.err"
    shell:
        """
        module unload miniconda
        source activate /vast/palmer/pi/turner/flg9/conda_envs/semibin

        # Train model
        SemiBin2 train_self \
        --data {input.csv} \
        --data-split {input.split} \
        -o {params.outdir} \
        -t {params.threads} \
        1>> {log.stdout} 2>> {log.stderr}
        """

checkpoint semibin2_bin_spades:
    """
    Bin contigs using SemiBin's multi-sample binning model for individual binning of samples.
    This method often returns the most complete bins and is most optimized for complex samples.    
    """
    input:
        contigs = "results/{genera}/3_dedup_contigs/SPAdes/individual_metagenome_assembly/{sample}/{sample}_DEDUP95.fasta",
        csv = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/features_and_model/{sample}/data.csv",
        model = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/train_model/{sample}/model.pt"
    output:
        outdir = directory("results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/binning/{sample}/output_bins"),
        check = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/binning/{sample}/contig_bins.tsv"
    params:
        base = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/binning/{sample}",
        outdir = "results/{genera}/6_binning/semibin2/SPAdes_individual_assembly/binning/{sample}/output_bins",
        minlen = "1500",
        threads = 4
    log:
        stdout = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/binning/{sample}/bin.out",
        stderr = "logs/{genera}/6_binning/semibin2/SPAdes_individual_assembly/binning/{sample}/bin.err"
    shell:
        """
        module unload miniconda
        source activate /vast/palmer/pi/turner/flg9/conda_envs/semibin

        # Bin
        SemiBin2 bin_short \
        -i {input.contigs} \
        --model {input.model} \
        --data {input.csv} \
        -o {params.base} \
        -t {params.threads} \
        --min-len={params.minlen} \
        1>> {log.stdout} 2>> {log.stderr}
        """

def get_semibin2_bin_spades(wc):
    ckpt = checkpoints.semibin2_bin_spades.get(genera=wc.genera, sample=wc.sample)
    import os, glob
    bins_dir = ckpt.output.outdir
    return sorted(glob.glob(os.path.join(bins_dir, "*.fa.gz")))

Error -

2025-10-09 13:20:22 login2.mccleary.ycrc.yale.edu SemiBin2[1617592] INFO Running SemiBin2 version 2.2.0
2025-10-09 13:20:24 login2.mccleary.ycrc.yale.edu SemiBin2[1617592] WARNING Did not detect GPU or CUDA was not installed/supported, using CPU.
2025-10-09 13:20:24 login2.mccleary.ycrc.yale.edu SemiBin2[1617592] INFO Start binning.
multiprocessing.pool.RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
                    ~~~~^^^^^^^^^^^^^^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/SemiBin/cluster.py", line 14, in run_infomap1
    return g.community_infomap(edge_weights=edge_weights, vertex_weights=vertex_weights, trials=trials)
           ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/igraph/community.py", line 56, in _community_infomap
    membership, codelength = GraphBase.community_infomap(
                             ~~~~~~~~~~~~~~~~~~~~~~~~~~~^
        graph, edge_weights, vertex_weights, trials
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
igraph._igraph.InternalError: Error at src/community/infomap/infomap.cc:263: Edge weights must not be NaN values. -- Invalid value
"""

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/bin/SemiBin2", line 10, in <module>
    sys.exit(main2())
             ~~~~~^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/SemiBin/main.py", line 1605, in main2
    binning_short(logger, args.data, args.minfasta_kb * 1000, binned_length,
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            environment=args.environment, contig_dict=contig_dict,
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            model_path=args.model_path, output=args.output,
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            device=device, args=args)
            ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/SemiBin/main.py", line 1222, in binning_short
    cluster(
    ~~~~~~~^
        logger,
        ^^^^^^^
    ...<8 lines>...
        binned_length=binned_length,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        minfasta=minfasta)
        ^^^^^^^^^^^^^^^^^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/SemiBin/cluster.py", line 289, in cluster
    embedding, contig_labels = run_embed_infomap(logger, model, data,
                               ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
            device=device, max_edges=args.max_edges, max_node=args.max_node,
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            is_combined=is_combined, n_sample=n_sample,
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            contig_dict=contig_dict, num_process=args.num_process,
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            random_seed=args.random_seed)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/SemiBin/cluster.py", line 182, in run_embed_infomap
    result = run_infomap(g,
                edge_weights=edge_weights,
                vertex_weights=length_weight,
                num_process=num_process)
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/site-packages/SemiBin/cluster.py", line 26, in run_infomap
    rs = [r.get() for r in rs]
          ~~~~~^^
  File "/vast/palmer/pi/turner/flg9/conda_envs/semibin/lib/python3.13/multiprocessing/pool.py", line 774, in get
    raise self._value
igraph._igraph.InternalError: Error at src/community/infomap/infomap.cc:263: Edge weights must not be NaN values. -- Invalid value

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions