diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 1520b608..b43d8daf 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -22,7 +22,7 @@ version 1.0 task ExtractSigPredictHRD { input { - String outputDir = "." + String outputDir = "./chord" String sampleName File snvIndelVcf File snvIndelVcfIndex @@ -36,8 +36,11 @@ task ExtractSigPredictHRD { } command { + set -e + mkdir -p ~{outputDir} + cd ~{outputDir} extractSigPredictHRD.R \ - ~{outputDir} \ + . \ ~{sampleName} \ ~{snvIndelVcf} \ ~{svVcf} \ diff --git a/fastp.wdl b/fastp.wdl index 9849738b..7df0a8f3 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -42,7 +42,7 @@ task Fastp { Int threads = 4 String memory = "50GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / select_first([effectiveSplit, threads])) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" Int? noneInt diff --git a/gridss.wdl b/gridss.wdl index 5aca3825..c9ba9bac 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -26,15 +26,12 @@ task AnnotateInsertedSequence { input { File inputVcf String outputPath = "gridss.annotated.vcf.gz" - File viralReference - File viralReferenceFai - File viralReferenceDict - File viralReferenceImg + BwaIndex viralReferenceBwaIndex Int threads = 8 String javaXmx = "8G" String memory = "9GiB" - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" Int timeMinutes = 120 } @@ -42,7 +39,7 @@ task AnnotateInsertedSequence { set -e _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}" AnnotateInsertedSequence \ - REFERENCE_SEQUENCE=~{viralReference} \ + REFERENCE_SEQUENCE=~{viralReferenceBwaIndex.fastaFile} \ INPUT=~{inputVcf} \ OUTPUT=~{outputPath} \ ALIGNMENT=APPEND \ @@ -65,10 +62,7 @@ task AnnotateInsertedSequence { parameter_meta { inputVcf: {description: "The input VCF file.", category: "required"} outputPath: {description: "The path the output will be written to.", category: "common"} - viralReference: {description: "A fasta file with viral sequences.", category: "required"} - viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} - viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} + viralReferenceBwaIndex: {description: "The BWA index of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -269,7 +263,7 @@ task GRIDSS { Int nonJvmMemoryGb = 10 Int threads = 12 Int timeMinutes = ceil(7200 / threads) + 1800 - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" } command { @@ -305,7 +299,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -345,14 +339,14 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25GiB" Int threads = 8 - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" Int timeMinutes = 1440 } command { gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ - --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ + --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ -w . \ -t ~{threads} \ ~{gridssVcf} @@ -382,6 +376,83 @@ task GridssAnnotateVcfRepeatmasker { } } +task GridssSvPrep { + input { + Array[String]+ tumorLabel + Array[File]+ tumorBam + Array[File]+ tumorBai + Array[File]+ tumorFilteredBam + Array[File]+ tumorFilteredBai + BwaIndex reference + File blacklistBed + File gridssProperties + + String? normalLabel + File? normalBam + File? normalBai + File? normalFilteredBam + File? normalFilteredBai + String outputPath = "gridss.vcf.gz" + + Int jvmHeapSizeGb = 48 + Int nonJvmMemoryGb = 10 + Int threads = 10 + Int timeMinutes = ceil(7200 / threads) + 1800 + String dockerImage = "quay.io/biowdl/gridss@sha256:f70696fda4b6f2612b21539d49986cf31bee7542a9eb0269a9f718f99df3fb2a" + } + + command { + gridss_sv-prep \ + --steps all \ + --output ~{outputPath} \ + --workingdir . \ + --reference ~{reference.fastaFile} \ + --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ + --blacklist ~{blacklistBed} \ + --configuration ~{gridssProperties} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \ + --bams ~{normalBam}~{true="," false="" defined(normalBam)}~{sep="," tumorBam} \ + --filtered_bams ~{normalFilteredBam}~{true="," false="" defined(normalFilteredBam)}~{sep="," tumorFilteredBam} \ + --jvmheap ~{jvmHeapSizeGb}G \ + --threads ~{threads} + } + + output { + File vcf = outputPath + File vcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} + tumorBai: {description: "The index for tumorBam.", category: "required"} + tumorFilteredBam: {description: "The input BAM file preprocessed by hmftools' sv-prep.", category: "required"} + tumorFilteredBai: {description: "The index for tumorFilteredBam.", category: "required"} + tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPath: {description: "The path for the output VCf file.", category: "common"} + normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} + normalBai: {description: "The index for normalBam.", category: "advanced"} + normalFilteredBam: {description: "The BAM file for the normal control sample preprocessed by hmftools' sv-prep.", category: "required"} + normalFilteredBai: {description: "The index for normalFilteredBam.", category: "required"} + normalLabel: {description: "The name of the normal sample.", category: "advanced"} + blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} + gridssProperties: {description: "A properties file for gridss.", category: "advanced"} + + threads: {description: "The number of the threads to use.", category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"} + nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task SomaticFilter { input { File vcfFile @@ -444,13 +515,13 @@ task Virusbreakend { File referenceFasta File referenceFastaFai File referenceFastaDict - File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String memory = "75GiB" + Int extraMemoryGB = 10 + Int gridssMemoryGB = 60 Int threads = 12 - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" Int timeMinutes = 320 } @@ -463,8 +534,9 @@ task Virusbreakend { --workingdir . \ --reference ~{referenceFasta} \ --db virusbreakenddb \ - --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ + --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ -t ~{threads} \ + --gridssargs '--jvmheap ~{gridssMemoryGB}G' \ ~{bam} } @@ -475,7 +547,7 @@ task Virusbreakend { runtime { cpu: threads - memory: memory + memory: "~{gridssMemoryGB + extraMemoryGB}GiB" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -484,10 +556,10 @@ task Virusbreakend { bam: {description: "A BAM file.", category: "required"} bamIndex: {description: "The index for the BAM file.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} - referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + extraMemoryGB: {description: "Extra memory needed for the job in GB.", category: "advanced"} + gridssMemoryGB: {description: "Memory assigned to GRIDSS in GB.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", diff --git a/hmftools.wdl b/hmftools.wdl index c27630a1..46422f15 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -22,9 +22,9 @@ version 1.0 task Amber { input { - String referenceName - File referenceBam - File referenceBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -33,40 +33,44 @@ task Amber { File referenceFasta File referenceFastaFai File referenceFastaDict + String refGenomeVersion + + Int? tumorOnlyMinDepth Int threads = 2 String memory = "85GiB" String javaXmx = "80G" Int timeMinutes = 480 - String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_1" } command { AMBER -Xmx~{javaXmx} \ - -reference ~{referenceName} \ - -reference_bam ~{referenceBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ -threads ~{threads} \ -ref_genome ~{referenceFasta} \ - -loci ~{loci} + -ref_genome_version ~{refGenomeVersion} \ + -loci ~{loci} \ + ~{"-tumor-only-min-depth " + tumorOnlyMinDepth} } output { File version = "~{outputDir}/amber.version" File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" - File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" - File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" - File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi" + File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv.gz" File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz" File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" + File normalHomozygousregionsTsv = "~{outputDir}/~{referenceName}.amber.homozygousregion.tsv" File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" - Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, - tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorContaminationVcf, + tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalHomozygousregionsTsv, normalSnpVcf, normalSnpVcfIndex] } @@ -90,6 +94,7 @@ task Amber { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the reference genome: 37 or 38.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -102,31 +107,40 @@ task Amber { task Cobalt { input { - String referenceName - File referenceBam - File referenceBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex String tumorName File tumorBam File tumorBamIndex String outputDir = "./cobalt" File gcProfile + File refGenomeFile + + File? tumorOnlyDiploidBed + File? targetRegionsNormalisationTsv + Int? pcfGamma Int threads = 1 String memory = "5GiB" String javaXmx = "4G" - Int timeMinutes = 480 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" + Int timeMinutes = 960 + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_1" } command { COBALT -Xmx~{javaXmx} \ - -reference ~{referenceName} \ - -reference_bam ~{referenceBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ -threads ~{threads} \ - -gc_profile ~{gcProfile} + -gc_profile ~{gcProfile} \ + -ref_genome ~{refGenomeFile} \ + ~{"-tumor_only_diploid_bed " + tumorOnlyDiploidBed} \ + ~{"-target_region " + targetRegionsNormalisationTsv} \ + ~{"-pcf_gamma" + pcfGamma} } output { @@ -136,10 +150,9 @@ task Cobalt { File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf" File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" - File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" - File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len" + File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv.gz" Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv, - normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen] + normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv] } runtime { @@ -158,6 +171,7 @@ task Cobalt { tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} outputDir: {description: "The path to the output directory.", category: "common"} gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + refGenomeFile: {description: "The reference genome fasta file.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -176,7 +190,7 @@ task CupGenerateReport { String memory = "5GiB" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.6" + String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98" } # This script writes to the directory that the input is located in. @@ -232,34 +246,29 @@ task Cuppa { input { Array[File]+ linxOutput Array[File]+ purpleOutput + File virusInterpreterOutput String sampleName Array[String]+ categories = ["DNA"] Array[File]+ referenceData - File purpleSvVcf - File purpleSvVcfIndex - File purpleSomaticVcf - File purpleSomaticVcfIndex String outputDir = "./cuppa" String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.6" + String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98" } command { set -e mkdir -p sampleData ~{outputDir} ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput} + ln -s -t sampleData ~{virusInterpreterOutput} cuppa -Xmx~{javaXmx} \ -output_dir ~{outputDir} \ - -output_id ~{sampleName} \ -categories '~{sep="," categories}' \ -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \ -sample_data_dir sampleData \ - -sample_data ~{sampleName} \ - -sample_sv_file ~{purpleSvVcf} \ - -sample_somatic_vcf ~{purpleSomaticVcf} + -sample_data ~{sampleName} } output { @@ -278,10 +287,6 @@ task Cuppa { sampleName: {description: "The name of the sample.", category: "required"} categories: {description: "The classifiers to use.", category: "advanced"} referenceData : {description: "The reference data.", category: "required"} - purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"} - purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"} - purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"} - purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"} outputDir: {description: "The directory the ouput will be placed in.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -300,7 +305,7 @@ task CuppaChart { String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.6" + String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98" } command { @@ -342,16 +347,24 @@ task Gripss { File knownFusionPairBedpe File breakendPon File breakpointPon - String referenceName - String tumorName + File repeatMaskFile + String? referenceName + String sampleName File vcf File vcfIndex + String outputId String outputDir = "./" + Boolean hg38 = false + Int? hardMinTumorQual + Int? minQualBreakPoint + Int? minQualBreakEnd + Boolean filterSgls = false + Boolean germline = false String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 50 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.3.2--hdfd78af_0" } command { @@ -359,21 +372,30 @@ task Gripss { mkdir -p ~{outputDir} gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ -known_hotspot_file ~{knownFusionPairBedpe} \ -pon_sgl_file ~{breakendPon} \ -pon_sv_file ~{breakpointPon} \ - -reference ~{referenceName} \ - -sample ~{tumorName} \ + -repeat_mask_file ~{repeatMaskFile} \ + ~{"-reference " + referenceName} \ + -sample ~{sampleName} \ -vcf ~{vcf} \ -output_dir ~{outputDir} \ - -output_id somatic + -output_id ~{outputId} \ + ~{if filterSgls then "-filter_sgls" else ""} \ + ~{"-hard_min_tumor_qual " + hardMinTumorQual} \ + ~{"-min_qual_break_point " + minQualBreakPoint} \ + ~{"-min_qual_break_end " + minQualBreakEnd} \ + ~{if germline then "-germline" else ""} } + String suffix = if defined(referenceName) then "somatic" else "germline" + output { - File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" - File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi" + File fullVcf = "~{outputDir}/~{sampleName}.gripss.~{suffix}.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.~{suffix}.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.~{suffix}.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.~{suffix}.vcf.gz.tbi" } runtime { @@ -390,7 +412,7 @@ task Gripss { knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"} breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"} breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"} - tumorName: {description: "The name of the tumor sample.", category: "required"} + sampleName: {description: "The name of the tumor sample.", category: "required"} referenceName: {description: "The name of the normal sample.", category: "required"} vcf: {description: "The input VCF.", category: "required"} vcfIndex: {description: "The index for the input VCF.", category: "required"} @@ -405,116 +427,6 @@ task Gripss { } } -task GripssApplicationKt { - # Obsolete - input { - File inputVcf - String outputPath = "gripss.vcf.gz" - String tumorName - String referenceName - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File breakpointHotspot - File breakendPon - File breakpointPon - - String memory = "32GiB" - String javaXmx = "31G" - Int timeMinutes = 45 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssApplicationKt \ - -tumor ~{tumorName} \ - -reference ~{referenceName} \ - -ref_genome ~{referenceFasta} \ - -breakpoint_hotspot ~{breakpointHotspot} \ - -breakend_pon ~{breakendPon} \ - -breakpoint_pon ~{breakpointPon} \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} \ - -paired_normal_tumor_ordinals - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceName: {description: "The name of the normal sample.", category: "required"} - tumorName: {description: "The name of the tumor sample.", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} - breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task GripssHardFilterApplicationKt { - # Obsolete - input { - File inputVcf - String outputPath = "gripss_hard_filter.vcf.gz" - - String memory = "3GiB" - String javaXmx = "2G" - Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - task HealthChecker { input { String outputDir = "." @@ -529,7 +441,7 @@ task HealthChecker { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 1 - String dockerImage = "quay.io/biowdl/health-checker:3.2" + String dockerImage = "quay.io/biowdl/health-checker:3.4" } command { @@ -585,19 +497,84 @@ task HealthChecker { } } +task Lilac { + input { + String sampleName + File referenceBam + File referenceBamIndex + File? tumorBam + File? tumorBamIndex + String refGenomeVersion + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File? geneCopyNumberFile + File? somaticVariantsFile + File? somaticVariantsFileIndex + String outputDir = "./lilac" + + #The following need to be in the same directory + File hlaRefAminoacidSequencesCsv + File hlaRefNucleotideSequencesCsv + File lilacAlleleFrequenciesCsv + + String javaXmx = "15G" + String memory = "16GiB" + Int timeMinutes = 1440 #FIXME + Int threads = 1 + String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.4.2--hdfd78af_0" + } + + command { + LILAC -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -reference_bam ~{referenceBam} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \ + -output_dir ~{outputDir} \ + -threads ~{threads} \ + ~{"-tumor_bam " + tumorBam} \ + ~{"-gene_copy_number " + geneCopyNumberFile} \ + ~{"-somatic_vcf " + somaticVariantsFile} + } + + output { + File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv" + File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv" + File candidatesCoverageCsv = "~{outputDir}/~{sampleName}.candidates.coverage.csv" + } + + runtime { + memory: memory + cpu: threads + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + + } +} + + task Linx { input { String sampleName File svVcf File svVcfIndex - Array[File]+ purpleOutput + Array[File] purpleOutput = [] String refGenomeVersion String outputDir = "./linx" - File fragileSiteCsv + File? fragileSiteCsv File lineElementCsv - File knownFusionCsv + File? knownFusionCsv File driverGenePanel Boolean writeAllVisFusions = false + Boolean germline = false + Boolean checkFusions = true + Boolean checkDrivers = true + Boolean writeVisData = true #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -607,47 +584,56 @@ task Linx { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.22.1--hdfd78af_0" + + String? DONOTDEFINE } + String? purpleDir = if length(purpleOutput) > 0 + then sub(purpleOutput[0], basename(purpleOutput[0]), "") + else DONOTDEFINE + command { linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleName} \ -sv_vcf ~{svVcf} \ - -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + ~{"-purple_dir " + purpleDir} \ -ref_genome_version ~{refGenomeVersion} \ -output_dir ~{outputDir} \ - -fragile_site_file ~{fragileSiteCsv} \ + ~{"-fragile_site_file " + fragileSiteCsv} \ -line_element_file ~{lineElementCsv} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ - -check_fusions \ - -known_fusion_file ~{knownFusionCsv} \ - -check_drivers \ + ~{if checkFusions then "-check_fusions" else ""} \ + ~{"-known_fusion_file " + knownFusionCsv} \ + ~{if checkDrivers then "-check_drivers" else ""} \ -driver_gene_panel ~{driverGenePanel} \ - -chaining_sv_limit 0 \ - -write_vis_data \ - ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} + ~{if writeVisData then "-write_vis_data" else ""} \ + ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \ + ~{if germline then "-germline" else ""} } + String prefix = if germline then "~{sampleName}.linx.germline" else "~{sampleName}.linx" + output { - File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv" - File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" - File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" - File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" - File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" - File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" - File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" - File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" - File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" - File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" - File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" - File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" - File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File driverCatalog = "~{outputDir}/~{prefix}.driver.catalog.tsv" + File linxClusters = "~{outputDir}/~{prefix}.clusters.tsv" + File linxLinks = "~{outputDir}/~{prefix}.links.tsv" + File linxSvs = "~{outputDir}/~{prefix}.svs.tsv" + File? linxBreakend = "~{outputDir}/~{prefix}.breakend.tsv" + File? linxDrivers = "~{outputDir}/~{prefix}.drivers.tsv" + File? linxFusion = "~{outputDir}/~{prefix}.fusion.tsv" + File? linxVisCopyNumber = "~{outputDir}/~{prefix}.vis_copy_number.tsv" + File? linxVisFusion = "~{outputDir}/~{prefix}.vis_fusion.tsv" + File? linxVisGeneExon = "~{outputDir}/~{prefix}.vis_gene_exon.tsv" + File? linxVisProteinDomain = "~{outputDir}/~{prefix}.vis_protein_domain.tsv" + File? linxVisSegments = "~{outputDir}/~{prefix}.vis_segments.tsv" + File? linxVisSvData = "~{outputDir}/~{prefix}.vis_sv_data.tsv" + File? linxDisruptionTsv = "~{outputDir}/~{prefix}.disruption.tsv" File linxVersion = "~{outputDir}/linx.version" - Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, + Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, - linxVersion] + linxDisruptionTsv, linxVersion]) } runtime { @@ -684,7 +670,7 @@ task Linx { task LinxVisualisations { input { - String outputDir = "./linx_visualisation" + String outputDir = "./linx" String sample String refGenomeVersion Array[File]+ linxOutput @@ -693,14 +679,14 @@ task LinxVisualisations { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.22.1--hdfd78af_0" } command { set -e mkdir -p ~{outputDir} java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \ + -cp /usr/local/share/hmftools-linx-1.22.1-0/linx.jar \ com.hartwig.hmftools.linx.visualiser.SvVisualiser \ -sample ~{sample} \ -ref_genome_version ~{refGenomeVersion} \ @@ -754,19 +740,25 @@ task Orange { File sageSomaticTumorSampleBqrPlot File purpleGeneCopyNumberTsv File purpleGermlineDriverCatalogTsv + File purpleGermlineDeletionTsv File purpleGermlineVariantVcf File purpleGermlineVariantVcfIndex Array[File]+ purplePlots File purplePurityTsv File purpleQcFile + File purpleSomaticCopyNumberFile File purpleSomaticDriverCatalogTsv File purpleSomaticVariantVcf File purpleSomaticVariantVcfIndex + File lilacQcCsv + File lilacResultCsv File linxFusionTsv File linxBreakendTsv File linxDriverCatalogTsv File linxDriverTsv + File linxGermlineDisruptionTsv Array[File]+ linxPlots + File linxStructuralVariantTsv File cuppaResultCsv File cuppaSummaryPlot File? cuppaFeaturePlot @@ -777,18 +769,23 @@ task Orange { #File pipelineVersionFile File cohortMappingTsv File cohortPercentilesTsv + Boolean hg38 = false + File driverGenePanel + File knownFusionFile String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/orange:v1.6" + String dockerImage = "quay.io/biocontainers/hmftools-orange:1.10.2--hdfd78af_0" } command { set -e mkdir -p ~{outputDir} + export JAVA_TOOL_OPTIONS='--add-opens=java.base/java.time=ALL-UNNAMED' orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -output_dir ~{outputDir} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ -doid_json ~{doidJson} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -max_evidence_level C \ @@ -803,17 +800,23 @@ task Orange { -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \ -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_germline_deletion_tsv ~{purpleGermlineDeletionTsv} \ -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \ -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \ -purple_purity_tsv ~{purplePurityTsv} \ -purple_qc_file ~{purpleQcFile} \ + -purple_somatic_copy_number_tsv ~{purpleSomaticCopyNumberFile} \ -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \ + -lilac_qc_csv ~{lilacQcCsv} \ + -lilac_result_csv ~{lilacResultCsv} \ -linx_fusion_tsv ~{linxFusionTsv} \ -linx_breakend_tsv ~{linxBreakendTsv} \ -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ -linx_driver_tsv ~{linxDriverTsv} \ + -linx_germline_disruption_tsv ~{linxGermlineDisruptionTsv} \ -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ + -linx_structural_variant_tsv ~{linxStructuralVariantTsv} \ -cuppa_result_csv ~{cuppaResultCsv} \ -cuppa_summary_plot ~{cuppaSummaryPlot} \ ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \ @@ -822,9 +825,10 @@ task Orange { -protect_evidence_tsv ~{protectEvidenceTsv} \ -annotated_virus_tsv ~{annotatedVirusTsv} \ -cohort_mapping_tsv ~{cohortMappingTsv} \ - -cohort_percentiles_tsv ~{cohortPercentilesTsv} + -cohort_percentiles_tsv ~{cohortPercentilesTsv} \ + -driver_gene_panel_tsv ~{driverGenePanel} \ + -known_fusion_file ~{knownFusionFile} } - #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} output { File orangeJson = "~{outputDir}/~{tumorName}.orange.json" @@ -894,18 +898,37 @@ task Pave { File referenceFastaDict String refGenomeVersion File driverGenePanel + File mappabilityBed + Array[File] gnomadFreqFiles = [] + #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv File transExonDataCsv File transSpliceDataCsv + File? ponFile + File? ponArtefactFile + String? ponFilters + File? clinvarVcf + File? clinvarVcfIndex + File? blacklistVcf + File? blacklistBed + File? blacklistVcfIndex + Boolean writePassOnly = false + Int timeMinutes = 50 String javaXmx = "8G" String memory = "9GiB" - String dockerImage = "quay.io/biowdl/pave:v1.0" + String dockerImage = "quay.io/biocontainers/hmftools-pave:1.4.1--hdfd78af_0" + + String? DONOTDEFINE } + String? gnomadFreqDir = if length(gnomadFreqFiles) > 0 + then sub(gnomadFreqFiles[0], basename(gnomadFreqFiles[0]), "") + else DONOTDEFINE + command { set -e mkdir -p ~{outputDir} @@ -916,7 +939,18 @@ task Pave { -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ - -driver_gene_panel ~{driverGenePanel} + -driver_gene_panel ~{driverGenePanel} \ + -read_pass_only \ + -mappability_bed ~{mappabilityBed} \ + ~{"-pon_file " + ponFile} \ + ~{"-pon_artefact_file " + ponArtefactFile} \ + ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \ + ~{"-gnomad_freq_dir " + gnomadFreqDir} \ + ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \ + ~{"-clinvar_vcf " + clinvarVcf} \ + ~{"-blacklist_bed " + blacklistBed} \ + ~{"-blacklist_vcf " + blacklistVcf} \ + ~{if writePassOnly then "-write_pass_only" else ""} } output { @@ -945,7 +979,16 @@ task Pave { proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} - + mappabilityBed: {description: "A bed file with mappability information.", category: "required"} + ponFile: {description: "A panel of normals files.", category: "common"} + ponArtefactFile: {description: "A panel of normals artefact file.", category: "common"} + ponFilters: {description: "Filters to be applied based on the panel of normals.", category: "common"} + gnomadFreqFiles: {description: "A directory with gnomad frequency information.", category: "common"} + clinvarVcf: {description: "A clinvar VCF file.", category: "common"} + clinvarVcfIndex: {description: "The index for the clinvar VCF file.", category: "common"} + blacklistVcf: {description: "A blacklist VCF file.", category: "common"} + blacklistBed: {description: "A blacklist bed file.", category: "common"} + blacklistVcfIndex: {description: "The index for the blacklist vcf file.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -961,7 +1004,7 @@ task Protect { String tumorName String referenceName Array[String]+ sampleDoids - String outputDir = "." + String outputDir = "./protect" Array[File]+ serveActionability File doidJson File purplePurity @@ -978,21 +1021,25 @@ task Protect { File linxDriversCatalog File chordPrediction File annotatedVirus + File lilacResultCsv + File lilacQcCsv + File driverGeneTsv String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biowdl/protect:v2.0" + String dockerImage = "quay.io/biocontainers/hmftools-protect:2.3--hdfd78af_0" } command { - protect -Xmx~{javaXmx} \ + protect -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ -reference_sample_id ~{referenceName} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ + -driver_gene_tsv ~{driverGeneTsv} \ -doid_json ~{doidJson} \ -purple_purity_tsv ~{purplePurity} \ -purple_qc_file ~{purpleQc} \ @@ -1005,7 +1052,9 @@ task Protect { -linx_breakend_tsv ~{linxBreakend} \ -linx_driver_catalog_tsv ~{linxDriversCatalog} \ -chord_prediction_txt ~{chordPrediction} \ - -annotated_virus_tsv ~{annotatedVirus} + -annotated_virus_tsv ~{annotatedVirus} \ + -lilac_result_csv ~{lilacResultCsv} \ + -lilac_qc_csv ~{lilacQcCsv} } output { @@ -1052,14 +1101,14 @@ task Protect { task Purple { input { - String referenceName + String? referenceName String tumorName String outputDir = "./purple" Array[File]+ amberOutput Array[File]+ cobaltOutput File gcProfile File somaticVcf - File germlineVcf + File? germlineVcf File filteredSvVcf File filteredSvVcfIndex File fullSvVcf @@ -1067,11 +1116,18 @@ task Purple { File referenceFasta File referenceFastaFai File referenceFastaDict + String refGenomeVersion File driverGenePanel File somaticHotspots - File germlineHotspots + File? germlineHotspots + File? germlineDelFreqFile Float? highlyDiploidPercentage Float? somaticMinPuritySpread + File? targetRegionsBed + File? targetRegionsRatios + File? targetRegionsMsiIndels + Int? minDiploidTumorRatioCount + Int? minDiploidTumorRatioCountCentromere #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1083,14 +1139,16 @@ task Purple { String memory = "9GiB" String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' - String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" + #String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" FIXME see if biocontainer works + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.7.1--hdfd78af_0" } command { - PURPLE -Xmx~{javaXmx} \ - -reference ~{referenceName} \ - -germline_vcf ~{germlineVcf} \ - -germline_hotspots ~{germlineHotspots} \ + PURPLE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + ~{"-reference " + referenceName} \ + ~{"-germline_vcf " + germlineVcf} \ + ~{"-germline_hotspots " + germlineHotspots} \ + ~{"-germline_del_freq_file " + germlineDelFreqFile} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -1101,12 +1159,18 @@ task Purple { -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ -driver_gene_panel ~{driverGenePanel} \ ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \ ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \ + ~{"-target_regions_bed " + targetRegionsBed} \ + ~{"-target_regions_ratios " + targetRegionsRatios} \ + ~{"-target_regions_msi_indels " + targetRegionsMsiIndels} \ + ~{"-min_diploid_tumor_ratio_count " + minDiploidTumorRatioCount} \ + ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \ -threads ~{threads} } @@ -1153,7 +1217,8 @@ task Purple { purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, - purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] + purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv, + purpleGermlineDeletionTsv] Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot]) Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, @@ -1204,72 +1269,163 @@ task Purple { } } -task Sage { +task Rose { input { + File actionabilityDatabaseTsv + Boolean hg38 = false + File driverGeneTsv + File purplePurityTsv + File purpleQc + File purpleGeneCopyNumberTsv + File purpleSomaticDriverCatalogTsv + File purpleGermlineDriverCatalogTsv + File purpleSomaticVcf + File purpleSomaticVcfIndex + File purpleGermlineVcf + File purpleGermlineVcfIndex + File linxFusionTsv + File linxBreakendTsv + File linxDriverCatalogTsv + File annotatedVirusTsv + File chordPredictionTxt + File cuppaResultCsv + String outputDir = "./rose" String tumorName - File tumorBam - File tumorBamIndex + String referenceName + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-rose:1.3--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + rose -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -actionability_database_tsv ~{actionabilityDatabaseTsv} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ + -driver_gene_tsv ~{driverGeneTsv} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{purpleQc} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ + -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ + -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_somatic_variant_vcf ~{purpleSomaticVcf} \ + -purple_germline_variant_vcf ~{purpleGermlineVcf} \ + -linx_fusion_tsv ~{linxFusionTsv} \ + -linx_breakend_tsv ~{linxBreakendTsv} \ + -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ + -annotated_virus_tsv ~{annotatedVirusTsv} \ + -chord_prediction_txt ~{chordPredictionTxt} \ + -cuppa_result_csv ~{cuppaResultCsv} \ + -output_dir ~{outputDir} \ + -tumor_sample_id ~{tumorName} \ + -ref_sample_id ~{referenceName} \ + -patient_id not_used_because_primary_tumor_tsv_has_only_headers + } + + output { + File roseTsv = "~{outputDir}/~{tumorName}.rose.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + + } +} + +task Sage { + input { + Array[String]+ tumorName + Array[File]+ tumorBam + Array[File]+ tumorBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai File hotspots File panelBed File highConfidenceBed + File coverageBed Boolean hg38 = false Boolean panelOnly = false String outputPath = "./sage.vcf.gz" + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv - String? referenceName - File? referenceBam - File? referenceBamIndex + Array[String] referenceName = [] + Array[File] referenceBam = [] + Array[File] referenceBamIndex = [] Int? hotspotMinTumorQual Int? panelMinTumorQual Int? hotspotMaxGermlineVaf Int? hotspotMaxGermlineRelRawBaseQual Int? panelMaxGermlineVaf Int? panelMaxGermlineRelRawBaseQual - String? mnvFilterEnabled - File? coverageBed + Int? refSampleCount + Float? hotspotMinTumorVaf + Int? highConfidenceMinTumorQual + Int? lowConfidenceMinTumorQual Int threads = 32 String javaXmx = "16G" String memory = "20GiB" Int timeMinutes = 720 - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/hmftools-sage:3.2.3--hdfd78af_0" } command { SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + referenceName} \ - ~{"-reference_bam " + referenceBam} \ - -ref_genome ~{referenceFasta} \ + -tumor ~{sep="," tumorName} \ + -tumor_bam ~{sep="," tumorBam} \ + ~{if length(referenceName) > 0 then "-reference" else ""} ~{sep="," referenceName} \ + ~{if length(referenceBam) > 0 then "-reference_bam" else ""} ~{sep="," referenceBam} \ -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{true="hg38" false="hg19" hg38} \ ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ + -high_confidence_bed ~{highConfidenceBed} \ + -panel_bed ~{panelBed} \ + -coverage_bed ~{coverageBed} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{true="38" false="37" hg38} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -write_bqr_data \ + -write_bqr_plot \ + -out ~{outputPath} \ + -threads ~{threads} \ ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \ ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ - ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ - ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ - -threads ~{threads} \ - -out ~{outputPath} + ~{"-ref_sample_count " + refSampleCount} \ + ~{"-hotspot_min_tumor_vaf " + hotspotMinTumorVaf} \ + ~{"-high_confidence_min_tumor_qual " + highConfidenceMinTumorQual} \ + ~{"-low_confidence_min_tumor_qual " + lowConfidenceMinTumorQual} } - output { + String outputDir = sub(outputPath, basename(outputPath), "") + + output { #FIXME does it produce multiple plots/tsvs if multiple samples are given? File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png" - File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv" - File tumorSageBqrPng = "~{tumorName}.sage.bqr.png" - File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv" - File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv" + File? referenceSageBqrPng = "~{outputDir}/~{referenceName[0]}.sage.bqr.png" + File? referenceSageBqrTsv = "~{outputDir}/~{referenceName[0]}.sage.bqr.tsv" + File tumorSageBqrPng = "~{outputDir}/~{tumorName[0]}.sage.bqr.png" + File tumorSageBqrTsv = "~{outputDir}/~{tumorName[0]}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{outputDir}/~{tumorName[0]}.sage.gene.coverage.tsv" + File referenceSageExonMediansTsv = "~{outputDir}/~{tumorName[0]}.sage.exon.medians.tsv" + Array[File] outputs = select_all([outputVcf, outputVcfIndex, referenceSageBqrPng, + referenceSageBqrTsv, tumorSageBqrPng, tumorSageBqrTsv, + sageGeneCoverageTsv, referenceSageExonMediansTsv]) } runtime { @@ -1299,8 +1455,193 @@ task Sage { hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"} panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} - mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} + refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"} + hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} + threads: {description: "The numve of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sigs { + input { + String sampleName + File signaturesFile + File somaticVcfFile + File somaticVcfIndex + String outputDir = "./sigs" + + String javaXmx = "4G" + String memory = "5GiB" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-sigs:1.1--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + sigs -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -signatures_file ~{signaturesFile} \ + -somatic_vcf_file ~{somaticVcfFile} \ + -output_dir ~{outputDir} + } + + output { + File sigAllocationTsv = "~{outputDir}/~{sampleName}.sig.allocation.tsv" + File sigSnvCountsCsv = "~{outputDir}/~{sampleName}.sig.snv_counts.csv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + + } +} + +task SvPrep { + # for ref also add tumorJunctionFile + input { + String sampleName + File bamFile + File bamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File blacklistBed + File knownFusionBed + String outputDir = "." + + File? existingJunctionFile + Boolean hg38 = false + + Int threads = 10 + String javaXmx = "48G" + String memory = "50GiB" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_1" + } + + command { + set -e + SvPrep -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -bam_file ~{bamFile} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{true="38" false="37" hg38} \ + -blacklist_bed ~{blacklistBed} \ + -known_fusion_bed ~{knownFusionBed} \ + ~{"-existing_junction_file " + existingJunctionFile} \ + -write_types "JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST" \ + -output_dir ~{outputDir} \ + -threads ~{threads} + samtools sort -O bam ~{outputDir}/~{sampleName}.sv_prep.bam -o ~{outputDir}/~{sampleName}.sv_prep.sorted.bam + samtools index ~{outputDir}/~{sampleName}.sv_prep.sorted.bam + } + + output { + File preppedBam = "~{outputDir}/~{sampleName}.sv_prep.sorted.bam" + File preppedBamIndex = "~{outputDir}/~{sampleName}.sv_prep.sorted.bam.bai" + File junctions = "~{outputDir}/~{sampleName}.sv_prep.junctions.csv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + bamFile: {description: "The BAM file to prepare for SV calling with GRIDSS.", category: "required"} + bamIndex: {description: "The index for the BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + blacklistBed: {description: "Blacklist bed file.", category: "required"} + knownFusionBed: {description: "Bed file with known fusion sites", category: "required"} + outputDir: {description: "Path to the output directory.", category: "common"} + existingJunctionFile: {description: "Junctions file generated by an earlier run of this tool, eg. from a paired sample.", category: "common"} + hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} + + threads: {description: "The numve of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task SvPrepDepthAnnotator { + input { + File inputVcf + File inputVcfIndex + Array[File]+ bamFiles + Array[File]+ bamIndexes + Array[String]+ samples + File referenceFasta + File referenceFastaDict + File referenceFastaFai + Boolean hg38 = false + String outputVcf = "gridss.depth_annotated.vcf.gz" + + Int threads = 10 + String javaXmx = "48G" + String memory = "50GiB" + Int timeMinutes = 240 + String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputVcf})" + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sv-prep-1.1-0/sv-prep.jar \ + com.hartwig.hmftools.svprep.depth.DepthAnnotator \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputVcf} \ + -samples ~{sep="," samples} \ + -bam_files ~{sep="," bamFiles} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ + -threads ~{threads} + } + + output { + File vcf = outputVcf + File vcfIndex = outputVcf + ".tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + samples: {description: "The names of the samples.", category: "required"} + bamFiles: {description: "The BAM files.", category: "required"} + bamIndexes: {description: "The indexes for the BAM files.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} + outputVcf: {description: "The path for the output VCF.", category: "common"} + + threads: {description: "The numve of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -1328,6 +1669,8 @@ task VirusInterpreter { } command { + set -e + mkdir -p ~{outputDir} virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample_id ~{sampleId} \ -purple_purity_tsv ~{purplePurityTsv} \ diff --git a/peach.wdl b/peach.wdl index 7da029d0..9ace8958 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,7 +30,7 @@ task Peach { File panelJson String memory = "2GiB" - String dockerImage = "quay.io/biowdl/peach:v1.5" + String dockerImage = "quay.io/biowdl/peach@sha256:025dc28fe448256729a6022d4d30deaee8105ab83d123dab9640251985240748" Int timeMinutes = 5 } @@ -41,7 +41,7 @@ task Peach { --vcf ~{germlineVcf} \ --sample_t_id ~{tumorName} \ --sample_r_id ~{normalName} \ - --tool_version 1.5 \ + --tool_version 1.7 \ --outputdir ~{outputDir} \ --panel ~{panelJson} } diff --git a/picard.wdl b/picard.wdl index 6628cf0e..314e0a7b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -510,6 +510,7 @@ task CollectWgsMetrics { Int? minimumMappingQuality Int? minimumBaseQuality Int? coverageCap + File? intervals String memory = "5GiB" String javaXmx = "4G" @@ -528,7 +529,8 @@ task CollectWgsMetrics { OUTPUT=~{outputPath} \ ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ - ~{"COVERAGE_CAP=" + coverageCap} + ~{"COVERAGE_CAP=" + coverageCap} \ + ~{"INTERVALS=" + intervals} } output { diff --git a/sambamba.wdl b/sambamba.wdl index be347f94..acf40278 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 35) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -139,6 +139,52 @@ task Markdup { } } +task Slice { + input { + File bamFile + File bamIndex + String outputPath = "./sliced.bam" + File regions + + String memory = "8G" + Int timeMinutes = 720 + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + + sambamba slice \ + -L ~{regions} \ + -o ~{outputPath} \ + ~{bamFile} + sambamba index ~{outputPath} + } + + output { + File slicedBam = outputPath + File slicedBamIndex = "~{outputPath}.bai" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + bamIndex: {description: "The input BAM files.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + regions: {description: "Regiosn to get sliced.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Sort { input { File inputBam diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..875b9ddb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -566,7 +566,9 @@ task View { File inFile String outputFileName = "view.bam" Boolean uncompressedBamOutput = false + Boolean useIndex = false + File? inFileIndex File? referenceFasta Int? includeFilter Int? excludeFilter @@ -589,7 +591,8 @@ task View { samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-u " false="" uncompressedBamOutput} \ + ~{if uncompressedBamOutput then "-u" else ""} \ + ~{if useIndex then "-M" else ""} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -617,6 +620,8 @@ task View { inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} + useIndex: {description: "Equivalent to samtools view's `-M` flag.", category: "advanced"} + inFileIndex: {description: "An index for the inFile.", category: "common"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"}