Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Snakemake Testing

on:
push:
branches: [ master ]
pull_request:

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Linting
uses: snakemake/snakemake-github-action@v1
with:
directory: '.test'
snakefile: '.test/Snakefile'
args: '--lint'

- name: Testing
uses: snakemake/snakemake-github-action@v1
with:
directory: '.test'
snakefile: '.test/Snakefile'
args: '--cores 1 --use-conda --conda-cleanup-pkgs cache'
stagein: '' # additional preliminary commands to run (can be multiline)
show-disk-usage-on-error: true
26 changes: 26 additions & 0 deletions .test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
container: "docker://continuumio/miniconda3"
configfile: "config/config.yaml"
include: "rules/common.smk"


rule targets:
input:
vcf=expand(
os.path.join(
config['output_folder'],
"vcfs",
"filtered",
"{patient}.filtered.vcf.gz"
), patient=patients
),
idx=expand(
os.path.join(
config['output_folder'],
"vcfs",
"filtered",
"{patient}.filtered.vcf.gz.tbi"
), patient=patients
)
include: "../workflow/rules/alignment.smk"
include: "../workflow/rules/preprocessing.smk"
include: "../workflow/rules/somatic.smk"
49 changes: 49 additions & 0 deletions .test/config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# One between ["WES", "WGS"]
sequencing_type: "WES"
tumor_only: true
viral_integrated: true
virus_name: "MCPyV"

# PATH to relevant files
units: "units.csv"
patients: "patients.csv"

# DIRECTIVES
output_folder: "output/"
tmp_dir: "tmp"
log_folder: "logs/"

# RESOURCES
resources:
assembly: "GRCh38"
genomic_regions: "ref/hg38_chr22_targets.bed"
viral_genome: "ref/MPCyV_genome.fa"
reference_fasta: "ref/GRCh38_chr22.fasta"
dbsnps: "ref/dbsnp_146.hg38.vcf.gz"
known-indels: "ref/Homo_sapiens_assembly38.known_indels.vcf.gz"
1000g: "ref/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
gnomad: "ref/af-only-gnomad.hg38.vcf.gz"
contamination: "ref/small_exac_common_3.hg38.vcf.gz"
PoN: "ref/1000g_pon_hg38.vcf.gz"
vep_cache: ""


# CONTAINERS
containers:
ctgflow_core: "docker://danilotat/ctgflow_core"
qc: ""
deepvariant: "docker://google/deepvariant:1.8.0"
deepsomatic: "docker://google/deepsomatic:1.8.0"

# PARAMS
params:
mutect2:
num_workers: 10
args: --max-population-af 0.05 --genotype-germline-sites --linked-de-bruijn-graph --dont-use-soft-clipped-bases
filtering: --max-events-in-region 4 --unique-alt-read-count 2 --min-reads-per-strand 1 --min-allele-fraction 0.001
vep:
assembly: "GRCh38"
threads: 4
extra: "--pick --af --check_existing --coding_only --format vcf --vcf --symbol --terms SO --no_intergenic --tsl"


Binary file added .test/data/pat01_1.fastq.gz
Binary file not shown.
Binary file added .test/data/pat01_2.fastq.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions .test/patients.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
patient
pat01
Binary file not shown.
Binary file not shown.
Binary file added .test/ref/1000g_pon_hg38.vcf.gz
Binary file not shown.
Binary file added .test/ref/1000g_pon_hg38.vcf.gz.tbi
Binary file not shown.
2 changes: 2 additions & 0 deletions .test/ref/GRCh38_chr22.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
@HD VN:1.6
@SQ SN:chr22 LN:50818468 M5:bd85195c4feed92050f02aabe5664435 UR:file:/Volumes/HD2/home/danilo/ctgflow/.test/ref/GRCh38_chr22.fasta
725,980 changes: 725,980 additions & 0 deletions .test/ref/GRCh38_chr22.fasta

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .test/ref/GRCh38_chr22.fasta.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr22 50818468 7 70 71
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions .test/ref/MPCyV_genome.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
@HD VN:1.6
@SQ SN:NC_010277.2 LN:5387 M5:203e8a9fe025df02a3869bf2c473977e UR:file:/Volumes/HD2/home/danilo/ctgflow/.test/ref/MPCyV_genome.fa
78 changes: 78 additions & 0 deletions .test/ref/MPCyV_genome.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
>NC_010277.2 Merkel cell polyomavirus isolate R17b, complete genome
CTTGTCTATATGCAGAAGGAGTTTGCAGAAAGAGCAGAGGAGCAAATGAGCTACCTCACTAAGGAGTGGT
TTTTATACTGCAGTTTCCCGCCCTTGGGATCTGCCCTTAGATACTGCCTTTTTTGCTAATTAAGCCTCTT
AAGCCTCAGAGGCCTCTCTCTTTTTTTTCCAGAGGCCTCGGAGGCTAGGAGCCCCAAGCCTCTGCCAACT
TGAAAAAAAAAAGTCACCTAGGCAGCCAAGTTGTGGTTACATGATTGAACTTTTATTGCTGCAGGGTTTC
TGGCATTGACTCATTTCCTGGAGAGGCGGAGTTTGACTGATAAACAAAACTTTTTTTCTTTCTGTTTGGG
AGGGAGACGGAAGACTCTTAACTTTTTTTCAACAAGGGAGGCCCGGAGGCTTTTTTTTCTCTTACAAAGG
GAGGAGGACATTAAAAGAGTAAGTATCCTTATTTATTTTTCAGGATGGGGGGCATCATCACACTGCTGGC
CAATATTGGTGAAATTGCTACTGAACTAAGTGCCACCACAGGAGTAACTTTGGAAGCTATTCTTACAGGA
GAAGCTTTAGCAGCTTTGGAAGCAGAGATCTCCAGTTTAATGACAATTGAGGGTATTTCTGGCATTGAGG
CTTTAGCTCAACTTGGGTTCACAGCTGAACAGTTTTCAAATTTCTCATTAGTGGCTTCTTTGGTTAACCA
AGGTTTAACTTATGGCTTCATTCTCCAAACTGTTAGTGGTATAGGCTCTCTAATAACTGTGGGGGTGAGG
TTGTCACGCGAGCAAGTGTCACTTGTAAAGAGGGATGTTTCGTGGGTAGGTAGTAATGAGGTTTTGAGGC
ATGCACTTATGGCCTTTAGCCTAGATCCTCTGCAGTGGGAAAATAGCTTGCTGCATTCTGTGGGGCAAGA
TATTTTTAATTCTTTATCTCCTACCTCTAGGCTGCAGATACAATCAAACCTAGTGAATCTGATACTAAAT
AGCCGGTGGGTCTTTCAGACAACTGCTTCTCAGAATCAGGGCCTTTTATCAGGAGAGGCTATATTAATTC
CTGAACATATAGGAGGAACTCTGCAGCAGCAAACTCCAGATTGGCTTCTTCCTCTGGTACTAGGCCTTAG
TGGATATATTTCTCCTGAATTACAAGTAATTGAAGATGGCACCAAAAAGAAAAGCATCATCCACCTGTAA
AACACCCAAAAGACAATGTATACCTAAGCCGGGATGCTGCCCTAATGTTGCCTCAGTTCCAAAACTGCTT
GTTAAAGGAGGAGTGGAAGTATTATCTGTGGTTACTGGAGAAGATAGCATTACCCAAATTGAGTTGTATT
TGAATCCAAGAATGGGAGTTAATTCCCCTGATCTTCCTACTACTTCAAACTGGTATACTTATACTTATGA
CCTGCAGCCAAAGGGATCATCTCCAGATCAGCCCATCAAGGAAAATTTGCCAGCTTACAGTGTGGCAAGA
GTGTCTCTGCCAATGCTAAATGAGGATATTACCTGTGACACATTGCAGATGTGGGAGGCAATATCTGTTA
AAACAGAAGTAGTTGGAATAAGTTCTTTAATTAATGTTCATTATTGGGACATGAAAAGAGTTCATGATTA
TGGTGCTGGTATTCCTGTGTCAGGGGTAAATTACCATATGTTTGCCATTGGGGGAGAACCTCTGGATTTG
CAAGGCCTAGTTTTAGATTACCAGACTGAGTATCCAAAAACTACAAATGGTGGGCCTATTACAATTGAAA
CTGTATTGGGAAGAAAAATGACACCTAAAAATCAGGGCCTAGATCCACAAGCTAAAGCAAAATTAGATAA
AGATGGAAATTATCCTATAGAAGTATGGTGTCCTGATCCTTCTAAAAATGAAAACAGTAGATACTATGGG
TCTATTCAGACAGGCTCTCAGACTCCTACAGTTCTTCAATTTAGTAATACTCTAACTACTGTCCTTTTAG
ATGAGAATGGAGTGGGCCCTCTATGCAAAGGAGATGGCCTATTTATTAGCTGTGCAGACATAGTGGGGTT
TCTGTTTAAAACCAGTGGAAAAATGGCTCTTCATGGGTTGCCTAGATATTTTAATGTTACTTTGAGAAAA
AGATGGGTGAAAAACCCCTACCCAGTAGTTAATTTAATAAACTCACTCTTCAGCAACTTAATGCCAAAAG
TGTCAGGCCAACCTATGGAAGGAAAAGATAATCAGGTAGAAGAGGTTAGAATATATGAGGGGTCAGAACA
ATTACCTGGTGATCCTGATATTGTCAGATTTTTAGATAAATTTGGGCAGGAGAAAACTGTTTACCCAAAG
CCCTCTGTTGCCCCAGCAGCAGTAACATTCCAAAGTAATCAGCAGGATAAGGGCAAGGCGCCACTGAAAG
GACCTCAAAAGGCCTCTCAAAAAGAAAGCCAAACACAAGAATTATGAGAATTATTTCATGCATTCCTATT
CAGTTAAGTAGGCCCCAGAAAAACAAACACAGGAAATATGAAGCAGATGCCTTTATTGAGAAAAAGTACC
AGAATCTTGGGTTTCTTCAGTTTCCTCAGGGCCCTCTTCCTCAATAAGAATATTGAGCAGAGGGTCCTGA
CCAGCTTCTACATTTTCTATCATTTGACAAAATTTACCATATGATATTTCACTCTGTAAAATTTGCTTCC
AGTTTTTAATTTCTTCTTGTAAGCAAGGCTTAAAGGTTGTATCAGGCAAGCACCAAATAAGACAAAGCAA
TAAAGTGGTTCCACTTTGAAGAATTCTTCTTTTTCTTATTTCCATGTTCTGATCCAGGGAATCTCTTAGA
TTTGCCTTTGGGGAAAAGTGTAAAGTATAACTAAATCTTGCTATTAATGTTTTGGGAATAAAATAATCAT
TAGCAGTAACAATACAAGGAGGAAAAATCTGATGCTTTTTATTCACATGCTTCTTCTCTAAGCTTACAGC
TACAGCACCATCTAGATGATCTCTTAAGTTATCAAGGTTATTTATTCCTTGCCCTGGTTGCAGATCTTTA
TTTAGGCTATTTTGCCCTTTCACATCCTCAAAAACAACCATAAATTTATCCAAAGCACATCCTAGTTCAA
AAGGCAGTTTATCAGATGGACAGTTTATATTCAAGGCCTTCCCTTCTAGCAAATCTATTAAGGCTGCAGC
AAAGCTTGTTTTTCCACTGTTAATAGGCCCTTTAAACCAAATGTTTCTATACTTAGGTATATTCTCTGTT
AATAATTGAATAATTTTCTGCAGCTTCTTTTCAAACTCTTCAAATAAGCAGCAGTACCAGGCCACACCAC
CCATATAATACAGTAGATCTATTGTATCTAAATCTCTTAATCTCTCTAGGTGCTTCTTAAACTTCTTACA
TAGCATTTCTGTCCTGGTCATTTCCAGCATCTCTAACCTCCTTTTGGCTAGAACAGTGTCTGCGGCTTGT
TGGCAAATGGTTTTCTGAGATTTAGATTCATAAAATAGCTTAGCATTAGAATGATGAGCCTCATGAGCCT
TGTGAGGTTTGAGGCGAGATCTGTTTTCACACTTTTGGCAAGGAAATGGTTTTGCAAAGTCTAGATAATG
GGCTAAGATAATAAAGTGGTCGTCTAGCTCATATTCACAAGCAAATTCAGCAACTAAATTCCAATTACAG
CTGGCCTCTTTTTCTTTTTCTTGAAATTCATAATTGAGCAGTGGCTTATTCTCTTGCAGTAATTTGTAAG
GGGGCTTGCATAAATTATTATACATTTCAGGCATCTTATTCACTCCTTTACAAATTAAAAAGCTTATAGT
GCAGAAGGTAGAGCAAAAATTCTTAATAGCAGATACTCTATGCTTTGATAAAGTTATAAACAATAAAATA
CATCCTAATTCACAGGCATGCCTGCTTTTAAAATCAACTTTAAATTTCTCAATCTTATCATATAACTCTA
TAGCTTTATCAGAAGTAGTATAAATGGCAAAACAACTTACTGTTTTATTACTATATACAGCATGGCTAAG
ATAATCAGAAAGATCAATAGGAAAATCAGTAGGAACAGGAGTTTCTCTGTTCTTTTTTGGCTTTGGTGGA
GTGCTTGTAAAACTTGCTGAACTAGCAGAGCTTGCAGAGCTTCGGGACCCCCCAAATTTTCGCTTTCTTG
AGAATGGAGGAGGGGTCTTCGGGGTGGTGAAGGAGGAGGATCTGTATTCCTCATCTGTAAACTGAGATGA
CGAGGCCTCCTCGGCAGAGGAAGACGGGGGCTGCCGGGGCGAGCTTCTTGAGGAGGGGGGCTCCTCAGGC
TCCTCAGAGGACGAGGGAGGCTCAGGGGAGGAAAGTGATTCATCGCAGAAGAGATCCTCCCAGGTGCCAT
CCGTTCTGGAAGAATTTCTAGGTACACTGGTTCCATTGGGTGTGCTGGATTCTCTTCCTGAATTGGTGGT
CTCCTCTCTGCTACTGGATCCAGAGGATGAGGTGGGTTCCTCATGGTGTTCGGGAGGTATATCGGGTCCT
CTGGACTGGGAGTCTGAAGCCTGGGACGCTGAGAAGGACCCATACCCAGAGGAAGAGCTCTGGCTGTGGG
GTGGTGAGCTTCCACTGGGGGCTCCCCTGGATGCATTGGAGGAAGGCTTTCTGGATCTTGAGTTGGTCCC
GTGTGGATTGGGCCCATATTCGTATGCCTTCCCGAAGCTGAATCCTCCTGATCTCCACCATTCTTTGAAT
TTAGTGGTCCCATATATAGGGGCCTCGTCAACCTAGATGGGAAAGTACAGAAAATCTGTCATAAATAACC
TTTCTTTGATATTTTGCCTTATAGACTTTTCCATATCTAATACTTACAGAGGAAGGAAGTAGGAGTCTAG
AAAAGGTGCAGATGCAGTAAGCAGTAGTCAGTTTCTTCTAAAGTTTTTTGCCACCAGTCAAAACTTTCCC
AAGTAGGAGGAAATCCAAACCAAAGAATAAAGCACTGATAGCAAAAACACTCTCCCCACGTCAGACAGTT
TTTTTGCTTTAAAGTTTTTAGACTACAATGCTGGCGAGACAACTTACAGCTAATACAAGCGCACTTAGAA
TCTCTAAGTTGCTTAAGCATGCACCCAGGACCTCTGCAAAATCTAGCATTATATCCACTTTGCATATAAT
CCTTTAAAGTTCCATATTCTTCCCAAGGAAATTTTGTACTGACCTCATCAAACATAGAGAAGTCACTTCT
GAGCTTGTGGATATTTTGCTGGAATTTGCTCCAAAGGGTGTTCAATTCCATCATTATAACAGGATTTCCC
CCTTTATCAGGGTGATGCTTTAAGCAGCTTCTTTTGAAAGCAGCTTTCATCAGAGGGATGTTGCCATAAC
AATTAGGAGCAATCTCTAAAAGCTTGCAGAGAGCCTCTCTTTCTTTCCTATTTAGGACTAAATCCAT
Binary file added .test/ref/af-only-gnomad.hg38.vcf.gz
Binary file not shown.
Binary file added .test/ref/af-only-gnomad.hg38.vcf.gz.tbi
Binary file not shown.
Binary file added .test/ref/dbsnp_146.hg38.vcf.gz
Binary file not shown.
Binary file added .test/ref/dbsnp_146.hg38.vcf.gz.tbi
Binary file not shown.
Loading