Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ __pycache__
.DS_Store
secrets
scratch
/tests/test_data
/tests/test_msm_home
34 changes: 34 additions & 0 deletions data_types/amplicon.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
types:
asv_seqs:
properties:
_: ASV sequences
Format: FASTA
Data: 16S rRNA amplicon sequence variants
ext: fasta
asv_table:
properties:
_: ASV abundance table
Data: sample by ASV count matrix
ext: tsv
asv_contig_map:
properties:
_: ASV to contig mapping via BLASTn
Format: BLAST6
ext: tsv
blast_identity_threshold:
properties:
_: Minimum percent identity for BLAST mapping (e.g., 97 or 100)
silva_source:
properties:
_: SILVA SSURef NR99 database source URL
silva_db:
properties:
_: SILVA SSURef NR99 reference database formatted for VSEARCH
silva_nb_classifier:
properties:
_: Pre-trained QIIME2 naive Bayes classifier for SILVA
ext: qza
asv_taxonomy:
properties:
_: ASV taxonomy classifications
ext: tsv
73 changes: 73 additions & 0 deletions data_types/annotation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
types:
# InterProScan
interproscan_json:
properties:
_: InterProScan annotations in JSON format
Format: JSON
ext: json
interproscan_gff:
properties:
_: InterProScan annotations in GFF3 format
Format: GFF3
ext: gff3

# KofamScan
kofamscan_results:
properties:
_: KEGG KO assignments from KofamScan
Format: TSV
ext: txt

# ProteinBERT
proteinbert_embeddings:
properties:
_: ProteinBERT embedding vectors
Format: Parquet
ext: parquet
proteinbert_index:
properties:
_: ProteinBERT sequence index
Format: CSV
ext: csv

# DeepEC
deepec_predictions:
properties:
_: DeepEC EC number predictions
ext: tsv

# DIAMOND UniRef50
diamond_uniref50_results:
properties:
_: DIAMOND alignment to UniRef50
Format: BLAST6
ext: tsv

# Reference database types
uniref50_diamond_db:
properties:
_: DIAMOND-formatted UniRef50 database
ext: dmnd
uniref50_source:
properties:
_: Marker for UniRef50 database download

# KofamScan databases
kofamscan_profiles:
properties:
_: KofamScan HMM profile directory containing .hmm files
kofamscan_ko_list:
properties:
_: KofamScan KO list file
ext: tsv
kofamscan_source:
properties:
_: Marker for KofamScan database download

# InterProScan data
interproscan_data:
properties:
_: InterProScan data directory
interproscan_source:
properties:
_: Marker for InterProScan data location
48 changes: 48 additions & 0 deletions data_types/binning.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
types:
metagenome_sample:
properties:
_: a metagenome sample identifier for binning

bin_fasta:
properties:
_: a single metagenomic bin FASTA file
Format: FASTA
Data: DNA sequence
ext: fna
contig_to_bin_table:
properties:
_: contig to bin assignment table
ext: tsv

metabat2_bin_fasta:
extends:
- bin_fasta
properties:
method: metabat2
metabat2_contig_to_bin_table:
extends:
- contig_to_bin_table
properties:
method: metabat2

semibin2_bin_fasta:
extends:
- bin_fasta
properties:
method: semibin2
semibin2_contig_to_bin_table:
extends:
- contig_to_bin_table
properties:
method: semibin2

comebin_bin_fasta:
extends:
- bin_fasta
properties:
method: comebin
comebin_contig_to_bin_table:
extends:
- contig_to_bin_table
properties:
method: comebin
36 changes: 34 additions & 2 deletions data_types/containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,24 @@ types:
checkm.oci:
extends: container
properties:
provides:
provides:
- checkm
comebin.oci:
extends: container
properties:
provides:
- run_comebin.sh
semibin.oci:
extends: container
properties:
provides:
- SemiBin2
metabat2.oci:
extends: container
properties:
provides:
- metabat2
- jgi_summarize_bam_contig_depths
metabuli.oci:
extends: container
properties:
Expand All @@ -122,8 +138,24 @@ types:
fastani.oci:
extends: container
properties:
provides:
provides:
- fastani
blast.oci:
extends: container
properties:
provides:
- blastn
- makeblastdb
vsearch.oci:
extends: container
properties:
provides:
- vsearch
qiime2.oci:
extends: container
properties:
provides:
- qiime

ppanggolin.oci:
extends: container
Expand Down
4 changes: 4 additions & 0 deletions data_types/lib.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,8 @@ types:
properties:
src: pangenome_heatmap
usage: command line
response_surface.py:
properties:
src: response_surface
usage: command line

54 changes: 54 additions & 0 deletions data_types/media_optimization.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
types:
growth_data:
properties:
ext: csv
format: CSV
atomic_description: media optimization growth curve data

response_surface_coefficients:
properties:
ext: csv
format: CSV
atomic_description: polynomial response surface model coefficients

model_suggestions:
properties:
ext: csv
format: CSV
atomic_description: optimized media composition suggestions

crashed_cultures:
properties:
ext: csv
format: CSV
atomic_description: table of crashed/outlier culture replicates per sample

growth_characteristics_plot:
properties:
ext: svg
format: SVG
atomic_description: growth curve panel plot with logistic fits

factor_importance_plot:
properties:
ext: svg
format: SVG
atomic_description: top factor importance bar chart

model_suggestions_plot:
properties:
ext: svg
format: SVG
atomic_description: model suggestions vs best tested scatter

response_surface_1d_plot:
properties:
ext: svg
format: SVG
atomic_description: 1D cross-section response surface plot

response_surface_2d_plot:
properties:
ext: svg
format: SVG
atomic_description: 2D heatmap response surface plot
23 changes: 23 additions & 0 deletions dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ case $1 in
--transforms $HERE/transforms/*
;;
###################################################
# test
--test-binning)
pytest tests/test_*.py -v --ignore=tests/cache
;;
--test-comebin)
pytest tests/test_binning_workflow.py::TestBinningWorkflowExecution::test_comebin_e2e \
-v --ignore=tests/cache \
-s --log-cli-level=INFO
;;
--test-semibin2)
pytest tests/test_binning_workflow.py::TestBinningWorkflowExecution::test_semibin2_e2e \
-v --ignore=tests/cache \
-s --log-cli-level=INFO
;;
--test-metabat2)
pytest tests/test_binning_workflow.py::TestBinningWorkflowExecution::test_metabat2_e2e \
-v --ignore=tests/cache \
-s --log-cli-level=INFO
;;
--test-annotation)
pytest tests/test_annotation_workflow.py
;;
###################################################
*)
echo "bad option"
echo $1
Expand Down
26 changes: 26 additions & 0 deletions resources/containers/_metadata/index.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@ manifest:
type: containers::bbtools.oci
bedtools.oci:
type: containers::bedtools.oci
blast.oci:
type: containers::blast.oci
checkm.oci:
type: containers::checkm.oci
comebin.oci:
type: containers::comebin.oci
deepec.oci:
type: containers::deepec.oci
diamond.oci:
type: containers::diamond.oci
fastani.oci:
type: containers::fastani.oci
fastp.oci:
Expand All @@ -23,8 +31,14 @@ manifest:
type: containers::hifiasm-meta.oci
hifiasm.oci:
type: containers::hifiasm.oci
interproscan.oci:
type: containers::interproscan.oci
kofamscan.oci:
type: containers::kofamscan.oci
megahit.oci:
type: containers::megahit.oci
metabat2.oci:
type: containers::metabat2.oci
metabuli.oci:
type: containers::metabuli.oci
miniasm.oci:
Expand All @@ -35,14 +49,26 @@ manifest:
type: containers::nanoplot.oci
ncbi-datasets.oci:
type: containers::ncbi-datasets.oci
polars.oci:
type: containers::polars.oci
ppanggolin.oci:
type: containers::ppanggolin.oci
pprodigal.oci:
type: containers::pprodigal.oci
proteinbert.oci:
type: containers::proteinbert.oci
python_for_data_science.oci:
type: containers::python_for_data_science.oci
qiime2.oci:
type: containers::qiime2.oci
samtools.oci:
type: containers::samtools.oci
semibin.oci:
type: containers::semibin.oci
seqkit.oci:
type: containers::seqkit.oci
sra-tools.oci:
type: containers::sra-tools.oci
vsearch.oci:
type: containers::vsearch.oci
schema: v1
Loading