From e554f35a07e4f6427e1d8ad1cb7ddcaf3fc50ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Aug 2020 15:38:25 +0200 Subject: [PATCH 001/439] add sage task --- sage.wdl | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 sage.wdl diff --git a/sage.wdl b/sage.wdl new file mode 100644 index 00000000..dbc101dc --- /dev/null +++ b/sage.wdl @@ -0,0 +1,92 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sage { + input { + String tumorName + File tumorBam + File tumorBai + String? normalName + File? normalBam + File? normalBai + String assembly + File referenceFasta + File hotspotVcf + File panelBed + File highConfidenceBed + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 + String javaXmx = "32G" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + SAGE \ + -Xmx~{javaXmx} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -assembly ~{assembly} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspotVcf} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -threads ~{threads} \ + + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} + highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From f335ac9b5d0d061fce172ebd843d76e46e3e1ed1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Aug 2020 12:56:51 +0200 Subject: [PATCH 002/439] adjust sage --- sage.wdl | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/sage.wdl b/sage.wdl index dbc101dc..ba0a6137 100644 --- a/sage.wdl +++ b/sage.wdl @@ -28,32 +28,27 @@ task Sage { String? normalName File? normalBam File? normalBai - String assembly File referenceFasta - File hotspotVcf - File panelBed - File highConfidenceBed + File referenceFastaDict + File referenceFastaFai + File knownHotspots + File codingRegsions Int timeMinutes = 60 #FIXME I've no idea how long this takes... - Int threads = 2 String javaXmx = "32G" + String memory = "33G" String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" } command { - SAGE \ - -Xmx~{javaXmx} \ + SAGE -Xmx~{javaXmx} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ - -assembly ~{assembly} \ -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspotVcf} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -threads ~{threads} \ - + -known_hotspots ~{knownHotspots} \ + -coding_regions ~{codingRegsions} \ -out ~{outputPath} } @@ -74,12 +69,13 @@ task Sage { tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} - hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} - highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} + codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 2b8e422685de9ea6f63831d8780231a058c1b0cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Sep 2020 15:08:25 +0200 Subject: [PATCH 003/439] add sagev2 --- sage.wdl | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..ed3d0866 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Sage { +task SageHotspot { input { String tumorName File tumorBam @@ -33,6 +33,7 @@ task Sage { File referenceFastaFai File knownHotspots File codingRegsions + String outputPath = "./sage_hotspot.vcf.gz" Int timeMinutes = 60 #FIXME I've no idea how long this takes... String javaXmx = "32G" @@ -58,7 +59,6 @@ task Sage { runtime { time_minutes: timeMinutes - cpu: threads docker: dockerImage memory: memory } @@ -77,6 +77,82 @@ task Sage { knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + String? normalName + File? normalBam + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + File panelBed + File highConfidenceBed + String assembly = "hg38" + String outputPath = "./sage.vcf.gz" + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + String javaXmx = "32G" + String memory = "33G" + Int threads = 2 + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + java -Xmx~{javaXmx} \ + -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{assembly} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} + highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} + assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} + outputPath: {description: "The path to write the output VCF to.", category: "common"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 1643ff2c165b27ca8cacf66899c30ccad5e0f3b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:29:07 +0200 Subject: [PATCH 004/439] update sage --- sage.wdl | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..cdce4680 100644 --- a/sage.wdl +++ b/sage.wdl @@ -31,33 +31,45 @@ task Sage { File referenceFasta File referenceFastaDict File referenceFastaFai - File knownHotspots - File codingRegsions + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath - Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 String javaXmx = "32G" String memory = "33G" - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" } command { - SAGE -Xmx~{javaXmx} \ + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ -ref_genome ~{referenceFasta} \ - -known_hotspots ~{knownHotspots} \ - -coding_regions ~{codingRegsions} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ -out ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... } runtime { - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads docker: dockerImage memory: memory @@ -74,8 +86,9 @@ task Sage { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} - codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 5f61dd78277dd0d9b408ce866c9e9548b6f152a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:33:31 +0200 Subject: [PATCH 005/439] fix sage... --- sage.wdl | 89 +------------------------------------------------------- 1 file changed, 1 insertion(+), 88 deletions(-) diff --git a/sage.wdl b/sage.wdl index 251630ce..f6e8588b 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task SageHotspot { +task Sage { input { String tumorName File tumorBam @@ -31,17 +31,11 @@ task SageHotspot { File referenceFasta File referenceFastaDict File referenceFastaFai -<<<<<<< HEAD File hotspots File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath -======= - File knownHotspots - File codingRegsions String outputPath = "./sage_hotspot.vcf.gz" ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb Int threads = 2 String javaXmx = "32G" @@ -75,12 +69,8 @@ task SageHotspot { } runtime { -<<<<<<< HEAD time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads -======= - time_minutes: timeMinutes ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb docker: dockerImage memory: memory } @@ -108,80 +98,3 @@ task SageHotspot { category: "advanced"} } } - -task Sage { - input { - String tumorName - File tumorBam - String? normalName - File? normalBam - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File hotspots - File panelBed - File highConfidenceBed - String assembly = "hg38" - String outputPath = "./sage.vcf.gz" - - Int timeMinutes = 60 #FIXME I've no idea how long this takes... - String javaXmx = "32G" - String memory = "33G" - Int threads = 2 - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" - } - - command { - java -Xmx~{javaXmx} \ - -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{assembly} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - } - - runtime { - time_minutes: timeMinutes - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} - highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} - assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} - outputPath: {description: "The path to write the output VCF to.", category: "common"} - - threads: {description: "The number of threads to be used.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file From bea730a027a6a3c27675af6e4c85bf72a9aad841 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 10:49:46 +0200 Subject: [PATCH 006/439] change default outputPath of sage --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index f6e8588b..71378bc7 100644 --- a/sage.wdl +++ b/sage.wdl @@ -35,7 +35,7 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath = "./sage_hotspot.vcf.gz" + String outputPath = "./sage.vcf.gz" Int threads = 2 String javaXmx = "32G" From c6d2c3ccc41031e7759655fa274ad0323362b418 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:02:12 +0200 Subject: [PATCH 007/439] change bai to bamIndex in sage --- sage.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index 71378bc7..79458cc1 100644 --- a/sage.wdl +++ b/sage.wdl @@ -24,10 +24,10 @@ task Sage { input { String tumorName File tumorBam - File tumorBai + File tumorBamIndex String? normalName File? normalBam - File? normalBai + File? normalBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai From fb14c451e290628e6666181844c47c8716510565 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:14:32 +0200 Subject: [PATCH 008/439] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 79458cc1..7c04aa99 100644 --- a/sage.wdl +++ b/sage.wdl @@ -78,7 +78,7 @@ task Sage { parameter_meta { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} From ddf76915f2fdb19774c782a957c5403f307933a7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 13:42:00 +0200 Subject: [PATCH 009/439] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 7c04aa99..ab42bee8 100644 --- a/sage.wdl +++ b/sage.wdl @@ -81,7 +81,7 @@ task Sage { tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From 66852ef0a1f5a08259a0f8eafc01d7a5d2bf1732 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Sep 2020 16:39:10 +0200 Subject: [PATCH 010/439] add snpeff task --- snpeff.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 snpeff.wdl diff --git a/snpeff.wdl b/snpeff.wdl new file mode 100644 index 00000000..e1c0184f --- /dev/null +++ b/snpeff.wdl @@ -0,0 +1,73 @@ +version 1.0 + +task snpEff { + input { + File vcf + File vcfIndex + String genomeVersion + File datadirZip + String outputPath = "./snpeff.vcf" + Boolean hgvs = true + Boolean lof = true + Boolean noDownstream = false + Boolean noIntergenic = false + Boolean noShiftHgvs = false + Int? upDownStreamLen + + String memory = "50G" + String javaXmx = "49G" + Int timeMinutes = 60 #FIXME + String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + unzip ~{datadirZip} + snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + -v \ + ~{genomeVersion} \ + -noDownload \ + -dataDir $PWD/data \ + ~{vcf} \ + ~{true="-hgvs" false="-noHgvs" hgvs} \ + ~{true="-lof" false="-noLof" lof} \ + ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-intergenic" false="" noIntergenic} \ + ~{true="-noShiftHgvs" false="" noShiftHgvs} \ + ~{"-upDownStreamLen " + upDownStreamLen} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to analyse.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} + datadirZip: {description: "A zip file containing the directory of databases. This zip file must contain a directory called `data`, with the database mentioned in the genomeVersion input as subdirectory.", + category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} + lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} + noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} + noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} + upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 3ee13418733a762df9883266a73d14426bd26118 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Oct 2020 09:47:52 +0200 Subject: [PATCH 011/439] typo --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index e1c0184f..95383b94 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,6 +1,6 @@ version 1.0 -task snpEff { +task SnpEff { input { File vcf File vcfIndex From 38333745daff01234eb36e178fb97ffb76c87d84 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Oct 2020 14:20:32 +0200 Subject: [PATCH 012/439] fix bcftools filter --- bcftools.wdl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..b1d6e5f0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -125,6 +125,53 @@ task Annotate { } } +task Filter { + input { + File vcf + File vcfIndex + Array[String] include = [] + String outputPath = "./filtered.vcf.gz" + + String memory = "256M" + Int timeMinutes = 1 + ceil(size(vcf, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools \ + filter \ + ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{vcf} \ + -O z \ + -o ~{outputPath} + bctools index --tbi ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + vcf: {description: "The VCF file to operate on.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + include: {description: "Equivalent to the `-i` option.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task Sort { input { File inputFile From 66399ba333105934575da4ff97e43f6e35ef06d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Oct 2020 13:07:13 +0200 Subject: [PATCH 013/439] fix whitespace --- bcftools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index b1d6e5f0..619c1733 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -141,11 +141,11 @@ task Filter { set -e mkdir -p "$(dirname ~{outputPath})" bcftools \ - filter \ + filter \ ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ - ~{vcf} \ - -O z \ - -o ~{outputPath} + ~{vcf} \ + -O z \ + -o ~{outputPath} bctools index --tbi ~{outputPath} } From 09372028e140528ccc255b73c87b48ad45a93a77 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 6 Oct 2020 16:20:29 +0200 Subject: [PATCH 014/439] fix bcftools filter --- bcftools.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 619c1733..0be3be93 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -129,7 +129,9 @@ task Filter { input { File vcf File vcfIndex - Array[String] include = [] + String? include + String? exclude + String? softFilter String outputPath = "./filtered.vcf.gz" String memory = "256M" @@ -142,7 +144,9 @@ task Filter { mkdir -p "$(dirname ~{outputPath})" bcftools \ filter \ - ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{"-i " + include} \ + ~{"-e " + exclude} \ + ~{"-s " + softFilter} ~{vcf} \ -O z \ -o ~{outputPath} From 4760d1873df4204bb64c38f6d6c8378c41568b46 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Oct 2020 10:08:58 +0200 Subject: [PATCH 015/439] remove redundant G in -Xmx in snpeff --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 95383b94..079a720a 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From a772e3773feedcb22f7e18f8a1f0130fd9b3cf0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 16 Oct 2020 15:08:33 +0200 Subject: [PATCH 016/439] add gripss, timeMinutes for gridss, fix typos --- bcftools.wdl | 2 +- gridss.wdl | 7 ++- gripss.wdl | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++ snpeff.wdl | 2 +- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 gripss.wdl diff --git a/bcftools.wdl b/bcftools.wdl index 0be3be93..e68e527c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -150,7 +150,7 @@ task Filter { ~{vcf} \ -O z \ -o ~{outputPath} - bctools index --tbi ~{outputPath} + bcftools index --tbi ~{outputPath} } output { diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..7516553d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,7 +34,8 @@ task GRIDSS { String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 - Int threads = 1 + Int threads = 2 + Int timeMinutes = ceil(1440 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -64,6 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" + time_minutes: timeMinutes docker: dockerImage } @@ -79,6 +81,7 @@ task GRIDSS { threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file diff --git a/gripss.wdl b/gripss.wdl new file mode 100644 index 00000000..6ed0bcf9 --- /dev/null +++ b/gripss.wdl @@ -0,0 +1,117 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "advanced"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task HardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file diff --git a/snpeff.wdl b/snpeff.wdl index 079a720a..d639a036 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 6581d965977ab6a4f31058065bca84fc4106ed9f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 14:05:48 +0200 Subject: [PATCH 017/439] add AnnotateInsertedSequence task to gridss.wdl --- gridss.wdl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 7516553d..78e4bd40 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -84,4 +84,60 @@ task GRIDSS { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } +} + +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + } + + command { + java -Xmx~{javaXmx} \ + -Dsamjdk.create_index=true \ + -Dsamjdk.use_async_io_read_samtools=true \ + -Dsamjdk.use_async_io_write_samtools=true \ + -Dsamjdk.use_async_io_write_tribble=true \ + -Dsamjdk.buffer_size=4194304 \ + -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ + gridss.AnnotateInsertedSequence \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From cd64c02f84707a26ed6787e83269347ed6a69ca4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 15:27:17 +0200 Subject: [PATCH 018/439] add some # !UnknownRuntimeKey --- gridss.wdl | 4 ++-- gripss.wdl | 4 ++-- snpeff.wdl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 78e4bd40..89558ff3 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -65,7 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -124,7 +124,7 @@ task AnnotateInsertedSequence { runtime { cpu: threads memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/gripss.wdl b/gripss.wdl index 6ed0bcf9..3f500a60 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -54,7 +54,7 @@ task ApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -100,7 +100,7 @@ task HardFilterApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/snpeff.wdl b/snpeff.wdl index d639a036..a26fadbd 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,7 +45,7 @@ task SnpEff { runtime { docker: dockerImage - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey memory: memory } From 208e8f46530b8a1d0dbdbd3afa22bc7449c03da3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 09:37:14 +0100 Subject: [PATCH 019/439] add some missing inputs to gridss AnnotateInsertedSequence and add missing \ to bcftools Filter --- bcftools.wdl | 2 +- gridss.wdl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index e68e527c..4703580a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -146,7 +146,7 @@ task Filter { filter \ ~{"-i " + include} \ ~{"-e " + exclude} \ - ~{"-s " + softFilter} + ~{"-s " + softFilter} \ ~{vcf} \ -O z \ -o ~{outputPath} diff --git a/gridss.wdl b/gridss.wdl index 89558ff3..cfbb7069 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -91,6 +91,8 @@ task AnnotateInsertedSequence { File inputVcf String outputPath = "gridss.annotated.vcf.gz" File viralReference + File viralReferenceFai + File viralReferenceDict Int threads = 8 String javaXmx = "8G" From 674158b82e2a637c536853113721c48db6e6d09c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 10:51:06 +0100 Subject: [PATCH 020/439] add license notice to snpeff, add index input for bcftools annotate, and BWA mem index image input for gridss annotate inserted sequences --- bcftools.wdl | 2 ++ gridss.wdl | 4 ++++ snpeff.wdl | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 4703580a..d358ab7b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,6 +44,7 @@ task Annotate { Boolean singleOverlaps = false Array[String] removeAnns = [] File inputFile + File? inputFileIndex String outputPath = "output.vcf.gz" Int threads = 0 @@ -117,6 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gridss.wdl b/gridss.wdl index cfbb7069..c444c854 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,6 +93,7 @@ task AnnotateInsertedSequence { File viralReference File viralReferenceFai File viralReferenceDict + File viralReferenceImg Int threads = 8 String javaXmx = "8G" @@ -134,6 +135,9 @@ task AnnotateInsertedSequence { inputVcf: {description: "The input VCF file.", category: "required"} outputPath: {description: "The path the output will be written to.", category: "common"} viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", diff --git a/snpeff.wdl b/snpeff.wdl index a26fadbd..2a113c52 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task SnpEff { input { File vcf From 836f40c11ad03ca513345ba56b6feb502b2724dc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:07:09 +0100 Subject: [PATCH 021/439] fix missing key in parameter_met --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d358ab7b..064e2d6e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8bc34ddf78f998b838bec85e43926b25da42cc66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:12:19 +0100 Subject: [PATCH 022/439] typo --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 064e2d6e..3b512716 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} + inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 070cbb252016d18f59d52e4919a2a267f1c18671 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 13:31:03 +0100 Subject: [PATCH 023/439] add missing input --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 3b512716..1dba7611 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -25,6 +25,7 @@ version 1.0 task Annotate { input { File? annsFile + File? annsFileIndex String? collapse Array[String] columns = [] String? exclude @@ -99,7 +100,8 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} + annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From 1e19fbb2a00187bfa10cab023aa52dacb1091e03 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 14:09:13 +0100 Subject: [PATCH 024/439] add missing inputs --- gripss.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gripss.wdl b/gripss.wdl index 3f500a60..c9a8f27d 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -25,6 +25,8 @@ task ApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" File referenceFasta + File referenceFastaFai + File referenceFastaDict File breakpointHotspot File breakendPon File breakpointPon @@ -61,7 +63,10 @@ task ApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "advanced"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} From d6109250b32299638c1d0f47edf580a69b0732b4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 16:36:10 +0100 Subject: [PATCH 025/439] add some cleanup to snpeff --- snpeff.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/snpeff.wdl b/snpeff.wdl index 2a113c52..85709079 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -59,6 +59,7 @@ task SnpEff { ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ > ~{outputPath} + rm -r $PWD/data } output { From 4a2f3366cb5f0cd57bfab8da01369c29c6a35063 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Nov 2020 16:19:31 +0100 Subject: [PATCH 026/439] add tasks for amber and cobalt, group tasks from hmftools in one file --- gripss.wdl | 122 ----------------------------------------------------- sage.wdl | 100 ------------------------------------------- 2 files changed, 222 deletions(-) delete mode 100644 gripss.wdl delete mode 100644 sage.wdl diff --git a/gripss.wdl b/gripss.wdl deleted file mode 100644 index c9a8f27d..00000000 --- a/gripss.wdl +++ /dev/null @@ -1,122 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task ApplicationKt { - input { - File inputVcf - String outputPath = "gripss.vcf.gz" - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File breakpointHotspot - File breakendPon - File breakpointPon - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssApplicationKt \ - -ref_genome ~{referenceFasta} \ - -breakpoint_hotspot ~{breakpointHotspot} \ - -breakend_pon ~{breakendPon} \ - -breakpoint_pon ~{breakpointPon} \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} - breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task HardFilterApplicationKt { - input { - File inputVcf - String outputPath = "gripss_hard_filter.vcf.gz" - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file diff --git a/sage.wdl b/sage.wdl deleted file mode 100644 index ab42bee8..00000000 --- a/sage.wdl +++ /dev/null @@ -1,100 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task Sage { - input { - String tumorName - File tumorBam - File tumorBamIndex - String? normalName - File? normalBam - File? normalBamIndex - File referenceFasta - File referenceFastaDict - File referenceFastaFai - File hotspots - File panelBed - File highConfidenceBed - Boolean hg38 = false - String outputPath = "./sage.vcf.gz" - - Int threads = 2 - String javaXmx = "32G" - String memory = "33G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{true="hg38" false="hg19" hg38} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... - } - - runtime { - time_minutes: timeMinutes # !UnknownRuntimeKey - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} - highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} From 9b8d8a9844ea41ad4f1f630ed6b816be5596f8c9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:04:28 +0100 Subject: [PATCH 027/439] add hmftools.wdl --- hmftools.wdl | 433 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 hmftools.wdl diff --git a/hmftools.wdl b/hmftools.wdl new file mode 100644 index 00000000..73c3e318 --- /dev/null +++ b/hmftools.wdl @@ -0,0 +1,433 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Amber { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./amber" + File loci + File referenceFasta + File referenceFastaFai + File referenceFastaDict + + Int threads = 2 + String memory = = "33G" + String javaXmx = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" + } + + command { + AMBER -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir} \ + -threads ~{threads} \ + -ref_genome ~{referenceFasta} \ + -loci ~{loci} + } + + output { + File version = "amber.version" + File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" + File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" + File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" + File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi" + File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz" + File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" + File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" + File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" + File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + normalSnpVcf, normalSnpVcfIndex] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + loci: {description: "A VCF file containing likely heterozygous sites.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Cobalt { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./cobalt" + File gcProfile + + Int threads = 1 + String memory = = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + } + + command { + COBALT -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir}\ + -threads ~{threads} \ + -gc_profile ~{gcProfile} + } + + output { + File version = "cobalt.version" + File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" + File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" + File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" + File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len" + Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv, + normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssHardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Purple { + input { + String normalName + String tumorName + String outputDir = "./purple" + Array[File]+ amberOutput + Array[File]+ cobaltOutput + File gcProfile + File somaticVcf + File filteredSvVcf + File fullSvVcf + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + + Int threads = 1 + Int time_minutes = 60 + String memory = "13G" + String javaXmx = "12G" + String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + } + + command { + PURPLE -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -output_dir ~{outputDir} \ + -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -gc_profile ~{gcProfile} \ + -somatic_vcf ~{somaticVcf} \ + -structural_vcf ~{filteredSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ + -circos /usr/local/bin/circos \ + -ref_genome ~{referenceFasta} \ + -driver_catalog \ + -hotspots ~{hotspots} \ + -threads ~{threads} + + # TODO if shallow also the following: + #-highly_diploid_percentage 0.88 \ + #-somatic_min_total 100 \ + #-somatic_min_purity_spread 0.1 + } + + output { + #TODO + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + amberOutput: {description: "The output files of hmftools amber.", category: "required"} + cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + somaticVcf: {description: "The somatic variant calling results.", category: "required"} + filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} + fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + File tumorBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath = "./sage.vcf.gz" + + String? normalName + File? normalBam + File? normalBamIndex + + Int threads = 2 + String javaXmx = "32G" + String memory = "33G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 90fd344b8f41fb6b1d632a8412ec2b416c5c7715 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:11:19 +0100 Subject: [PATCH 028/439] fix some typos --- hmftools.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 73c3e318..3757cade 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = = "33G" + String memory = "33G" String javaXmx = "32G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = = "9G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" @@ -286,10 +286,10 @@ task Purple { File hotspots Int threads = 1 - Int time_minutes = 60 + Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" } command { @@ -297,8 +297,8 @@ task Purple { -reference ~{normalName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ - -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ - -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ -structural_vcf ~{filteredSvVcf} \ From 764f188c73d8c1b57f0d50b148a30d0e84309c42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:39:52 +0100 Subject: [PATCH 029/439] fix outputs amber/cobalt --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3757cade..09af79c9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -54,7 +54,7 @@ task Amber { } output { - File version = "amber.version" + File version = "~{outputDir}/amber.version" File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" @@ -130,7 +130,7 @@ task Cobalt { } output { - File version = "cobalt.version" + File version = "~{outputDir}/cobalt.version" File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" From 857da21ef4b61276d3beb5ddbe56d0895cd96c32 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:54:06 +0100 Subject: [PATCH 030/439] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 09af79c9..ed2914bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -124,7 +124,7 @@ task Cobalt { -reference_bam ~{normalBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - -output_dir ~{outputDir}\ + -output_dir ~{outputDir} \ -threads ~{threads} \ -gc_profile ~{gcProfile} } From 54ac9d0c41f74c578f2418bc76483d1081695369 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 15:18:41 +0100 Subject: [PATCH 031/439] add missed argument in purple --- hmftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index ed2914bf..fc56ecd9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -283,6 +283,7 @@ task Purple { File referenceFasta File referenceFastaFai File referenceFastaDict + File driverGenePanel File hotspots Int threads = 1 @@ -306,6 +307,7 @@ task Purple { -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ + -driver_gene_panel ~{driverGenePanel} \ -hotspots ~{hotspots} \ -threads ~{threads} @@ -340,6 +342,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} From 243c1dbfc834d2e52876e826bf2f852fe51cb2fb Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 26 Nov 2020 09:05:09 +0100 Subject: [PATCH 032/439] enable genotyping --- smoove.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index e5c5348f..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -41,11 +41,13 @@ task Call { --outdir ~{outputDir} \ --name ~{sample} \ --fasta ~{referenceFasta} \ + --removepr \ + --genotype \ ~{bamFile} } output { - File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + File smooveVcf = outputDir + "/" + sample + "-smoove.genotyped.vcf.gz" } runtime { From e87052a739ba2d2ac29cf0dad1cb5ace642f6e8c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 9 Dec 2020 13:26:24 +0100 Subject: [PATCH 033/439] add duphold paramater in smoove --- smoove.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a7e4305 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,6 +43,7 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ + --duphold \ ~{bamFile} } From 52b7c02f4ed1e7bee376af192747efa75cf55004 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:04:43 +0100 Subject: [PATCH 034/439] bcftools: rm memory parameter meta --- bcftools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..0cbfdefd 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -43,7 +43,7 @@ task Annotate { File? regionsFile File? renameChrs File? samplesFile - + Int threads = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -53,7 +53,7 @@ task Annotate { Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { - set -e + set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ @@ -154,7 +154,7 @@ task Sort { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" } - + runtime { memory: memory time_minutes: timeMinutes @@ -291,6 +291,8 @@ task View { File inputFile String outputPath = "output.vcf" + String? exclude + String? include String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +304,8 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{"--include " + include} \ + ~{"--exclude " + exclude} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,7 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4cf91963c64c48478c8009e65aa20678ad423eb9 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:44:10 +0100 Subject: [PATCH 035/439] add duphold --- duphold.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 duphold.sh diff --git a/duphold.sh b/duphold.sh new file mode 100644 index 00000000..6e65ee5c --- /dev/null +++ b/duphold.sh @@ -0,0 +1,76 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + } + + String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" + + command { + set -e + mkdir -p ~{outputDir} + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls of structural variants in VCF file."} + } +} From fb65bfe1ab5e627cb23812264ab651748e844b89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 11:13:40 +0100 Subject: [PATCH 036/439] add duphold.wdl --- duphold.sh => duphold.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename duphold.sh => duphold.wdl (92%) diff --git a/duphold.sh b/duphold.wdl similarity index 92% rename from duphold.sh rename to duphold.wdl index 6e65ee5c..9c7255ff 100644 --- a/duphold.sh +++ b/duphold.wdl @@ -32,7 +32,7 @@ task Duphold { String memory = "15G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" @@ -60,6 +60,7 @@ task Duphold { parameter_meta { # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} @@ -71,6 +72,6 @@ task Duphold { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls of structural variants in VCF file."} + outputVcf: {description: "Duphold annotated VCF file."} } } From fca78c3d28d57b5ebfe802deccc52b86ae00c651 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:23 +0100 Subject: [PATCH 037/439] fix outputpath --- duphold.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/duphold.wdl b/duphold.wdl index 9c7255ff..80fe31d2 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -35,11 +35,9 @@ task Duphold { String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } - String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" - command { set -e - mkdir -p ~{outputDir} + mkdir -p "$(dirname ~{outputPath})" export DUPHOLD_SAMPLE_NAME=~{sample} duphold \ -v ~{inputVcf} \ @@ -66,7 +64,7 @@ task Duphold { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } sample: {description: "The name of the sample.", category: "required"} - outputDir: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 80566da7e582afa0d445547fb3555a8f9cccae07 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:39 +0100 Subject: [PATCH 038/439] remove duphold parameter --- smoove.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 7a7e4305..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,7 +43,6 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ - --duphold \ ~{bamFile} } From 0232cf8e79dc6975eecc9a7d2336f45f2d191f05 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Dec 2020 16:19:20 +0100 Subject: [PATCH 039/439] add some taks --- hmftools.wdl | 47 ++++++++++++++++++++++++++++++++++++++ picard.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index fc56ecd9..f9a606e7 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,6 +269,53 @@ task GripssHardFilterApplicationKt { } } +task HealthChecker { + input { + String normalName + String tumorName + + String javaXmx = "10G" + } + + command { + java -Xmx10G \ + -jar /opt/tools/health-checker/3.1/health-checker.jar \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -metrics_dir ~{metricsPath} \ + -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + # super("health-checker", + # Versions.HEALTH_CHECKER, + # "health-checker.jar", + # "10G", + # Lists.newArrayList("-reference", + # referenceSampleName, + # "-tumor", + # tumorSampleName, + # "-ref_wgs_metrics_file", + # referenceMetricsPath, + # "-tum_wgs_metrics_file", + # tumorMetricsPath, + # "-ref_flagstat_file", + # referenceFlagstatPath, + # "-tum_flagstat_file", + # tumorFlagstatPath, + # "-purple_dir", + # purplePath, + # "-output_dir", + # outputPath)); + + output { + + } + + +} + task Purple { input { String normalName diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..88ddd313 100644 --- a/picard.wdl +++ b/picard.wdl @@ -315,6 +315,70 @@ task CollectTargetedPcrMetrics { } } +task CollectWgsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String outputPath = "./wgs_metrics.txt" + + Int? minimumMappingQuality + Int? minimumBaseQuality + Int? coverageCap + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectWgsMetrics \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + INPUT=~{inputBam} \ + OUTPUT=~{outputPath} \ + ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ + ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ + ~{"OVERAGE_CAP=" + coverageCap} + } + + output { + File metrics = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPath: {description: "The path picard CollectWgsMetrics' output should be written to.", category: "common"} + minimumMappingQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_MAPPING_QUALITY option.", category: "advanced"} + minimumBaseQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_BASE_QUALITY option.", category: "advanced"} + coverageCap: {description: "Equivalent to picard CollectWgsMetrics' OVERAGE_CAP option.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From 9896f4fcaba3d5ee9b070a03a21bc23484037fb1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Dec 2020 14:08:56 +0100 Subject: [PATCH 040/439] add purple outputs --- bcftools.wdl | 2 +- bwa.wdl | 2 +- gridss.wdl | 2 +- hmftools.wdl | 39 ++++++++++++++++++++++++++++++++++----- sambamba.wdl | 2 +- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1dba7611..c91460bb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -50,7 +50,7 @@ task Annotate { Int threads = 0 String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 10 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..44cfc9fe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -34,7 +34,7 @@ task Mem { Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } diff --git a/gridss.wdl b/gridss.wdl index c444c854..88655442 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,7 +35,7 @@ task GRIDSS { Int jvmHeapSizeGb = 30 Int threads = 2 - Int timeMinutes = ceil(1440 / threads) + 10 + Int timeMinutes = ceil(2880 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } diff --git a/hmftools.wdl b/hmftools.wdl index f9a606e7..86d90332 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "33G" String javaXmx = "32G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" } @@ -312,8 +312,6 @@ task HealthChecker { output { } - - } task Purple { @@ -327,6 +325,7 @@ task Purple { File somaticVcf File filteredSvVcf File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -365,7 +364,37 @@ task Purple { } output { - #TODO + File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" + File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" + File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" + File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" + File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" + File purpleSegmentTsv = "~{outputDir}/~{tumorName}.purple.segment.tsv" + File purpleSomaticClonalityTsv = "~{outputDir}/~{tumorName}.purple.somatic.clonality.tsv" + File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" + File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" + File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" + File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" + File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" + File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" + File mapPlot = "~{outputDir}/plot/~{tumorName}.map.png" + File purityRangePlot = "~{outputDir}/plot/~{tumorName}.purity.range.png" + File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" + File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" + File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File purpleVersion = "~{outputDir}/purple.version" + Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, + purpleVersion] + Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] } runtime { diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..3fc57c65 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -41,7 +41,7 @@ task Markdup { Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads } String bamIndexPath = sub(outputPath, "\.bam$", ".bai") From 48d468d7c97e4b9e3ee892ff49b3fdda4fee9de9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:11:41 +0100 Subject: [PATCH 041/439] add note to HealthChecker --- hmftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hmftools.wdl b/hmftools.wdl index 86d90332..760fb63f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -270,6 +270,7 @@ task GripssHardFilterApplicationKt { } task HealthChecker { + # WIP input { String normalName String tumorName From c482e833fa60a8a138b8045dc3f044be0655599c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:31:52 +0100 Subject: [PATCH 042/439] comment out healthchecker task and remove duplicate input in bcftools annotate --- bcftools.wdl | 4 +-- hmftools.wdl | 90 ++++++++++++++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8721540a..14889dff 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,9 +44,7 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - Boolean singleOverlaps = false - + File? samplesFile Int threads = 0 String memory = "256M" diff --git a/hmftools.wdl b/hmftools.wdl index 760fb63f..16313fca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,51 @@ task GripssHardFilterApplicationKt { } } -task HealthChecker { - # WIP - input { - String normalName - String tumorName - - String javaXmx = "10G" - } - - command { - java -Xmx10G \ - -jar /opt/tools/health-checker/3.1/health-checker.jar \ - -reference ~{normalName} \ - -tumor ~{tumorName} \ - -metrics_dir ~{metricsPath} \ - -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ - -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} - } - - # super("health-checker", - # Versions.HEALTH_CHECKER, - # "health-checker.jar", - # "10G", - # Lists.newArrayList("-reference", - # referenceSampleName, - # "-tumor", - # tumorSampleName, - # "-ref_wgs_metrics_file", - # referenceMetricsPath, - # "-tum_wgs_metrics_file", - # tumorMetricsPath, - # "-ref_flagstat_file", - # referenceFlagstatPath, - # "-tum_flagstat_file", - # tumorFlagstatPath, - # "-purple_dir", - # purplePath, - # "-output_dir", - # outputPath)); - - output { - - } -} +# task HealthChecker { +# # WIP +# input { +# String normalName +# String tumorName +# +# String javaXmx = "10G" +# } +# +# command { +# java -Xmx10G \ +# -jar /opt/tools/health-checker/3.1/health-checker.jar \ +# -reference ~{normalName} \ +# -tumor ~{tumorName} \ +# -metrics_dir ~{metricsPath} \ +# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ +# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ +# -output_dir ~{outputDir} +# } +# +# # super("health-checker", +# # Versions.HEALTH_CHECKER, +# # "health-checker.jar", +# # "10G", +# # Lists.newArrayList("-reference", +# # referenceSampleName, +# # "-tumor", +# # tumorSampleName, +# # "-ref_wgs_metrics_file", +# # referenceMetricsPath, +# # "-tum_wgs_metrics_file", +# # tumorMetricsPath, +# # "-ref_flagstat_file", +# # referenceFlagstatPath, +# # "-tum_flagstat_file", +# # tumorFlagstatPath, +# # "-purple_dir", +# # purplePath, +# # "-output_dir", +# # outputPath)); +# +# output { +# +# } +# } task Purple { input { From c22629ff7ec5c57f113ed79e2fc2784ee915b89f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Jan 2021 15:03:13 +0100 Subject: [PATCH 043/439] add linx task, add more inputs to sage --- hmftools.wdl | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 16313fca..15f54937 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -315,6 +315,110 @@ task GripssHardFilterApplicationKt { # } # } +task Linx { + input { + String sampleName + File svVcf + File svVcfIndex + Array[File]+ purpleOutput + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String refGenomeVersion + String outputDir = "./linx" + File fragileSiteCsv + File lineElementCsv + File replicationOriginsBed + File viralHostsCsv + File knownFusionCsv + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 30 + String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + } + + command { + linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -sv_vcf ~{svVcf} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -output_dir ~{outputDir} \ + -fragile_site_file ~{fragileSiteCsv} \ + -line_element_file ~{lineElementCsv} \ + -replication_origins_file ~{replicationOriginsBed} \ + -viral_hosts_file ~{viralHostsCsv} \ + -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -check_fusions \ + -known_fusion_file ~{knownFusionCsv} \ + -check_drivers \ + -driver_gene_panel ~{driverGenePanel} \ + -chaining_sv_limit 0 \ + -write_vis_data + } + + output { + File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" + File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" + File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" + File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" + File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" + File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" + File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" + File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" + File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" + File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" + File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" + File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" + File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File linxVersion = "~{outputDir}/linx.version" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} + svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} + purpleOutput: {description: "The files produced by PURPLE.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} + lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} + replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} + viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} + knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName @@ -419,7 +523,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} @@ -444,11 +548,20 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false + Boolean panelOnly = false String outputPath = "./sage.vcf.gz" String? normalName File? normalBam File? normalBamIndex + Int? hotspotMinTumorQual + Int? panelMinTumorQual + Int? hotspotMaxGermlineVaf + Int? hotspotMaxGermlineRelRawBaseQual + Int? panelMaxGermlineVaf + Int? panelMaxGermlineRelRawBaseQual + String? mnvFilterEnabled + File? coverageBed Int threads = 2 String javaXmx = "32G" @@ -470,6 +583,15 @@ task Sage { -panel_bed ~{panelBed} \ -high_confidence_bed ~{highConfidenceBed} \ -assembly ~{true="hg38" false="hg19" hg38} \ + ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ + ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ + ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ + ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \ + ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ + ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ + ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ + ~{"-coverage_bed " + coverage_bed} \ + ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} } @@ -502,6 +624,13 @@ task Sage { hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + hotspotMinTumorQual: {description: "Equivalent to sage's `hotspot_min_tumor_qual` option.", category: "advanced"} + panelMinTumorQual: {description: "Equivalent to sage's `panel_min_tumor_qual` option.", category: "advanced"} + hotspotMaxGermlineVaf: {description: "Equivalent to sage's `hotspot_max_germline_vaf` option.", category: "advanced"} + hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"} + panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 111a42bf79d1fb8fa6a34d7b567dc4fc04f67e7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 21 Jan 2021 14:23:53 +0100 Subject: [PATCH 044/439] fix typos --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 15f54937..6de3f777 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,7 +590,7 @@ task Sage { ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ - ~{"-coverage_bed " + coverage_bed} \ + ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} From 96fa1bc6ba59825f051c0577d414027fd58f10c4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:38:49 +0100 Subject: [PATCH 045/439] fix some issues, add flagstat --- bcftools.wdl | 1 - hmftools.wdl | 6 +++++- picard.wdl | 4 ++-- sambamba.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 14889dff..b239320d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -106,7 +106,6 @@ task Annotate { inputFile: {description: "A vcf or bcf file.", category: "required"} inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 6de3f777..67c49be3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,7 +341,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" } command { @@ -381,6 +381,10 @@ task Linx { File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" + Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, + linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, + linxVisFusion, linxVisGeneExon, linxVisProteinDomain, + linxVisSegments, linxVisSvData, linxVersion] } runtime { diff --git a/picard.wdl b/picard.wdl index d52b9cc7..8dc4e0bf 100644 --- a/picard.wdl +++ b/picard.wdl @@ -473,10 +473,10 @@ task CollectWgsMetrics { CollectWgsMetrics \ REFERENCE_SEQUENCE=~{referenceFasta} \ INPUT=~{inputBam} \ - OUTPUT=~{outputPath} \ + OUTPUT=~{outputPath} \ ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ - ~{"OVERAGE_CAP=" + coverageCap} + ~{"COVERAGE_CAP=" + coverageCap} } output { diff --git a/sambamba.wdl b/sambamba.wdl index 0e9a901c..bb63f665 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,6 +20,49 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Flagstat { + input { + File inputBam + File inputBamIndex + String outputPath = "./flagstat.txt" + + Int threads = 2 + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + } + + command { + sambamba flagstat \ + -t ~{threads} \ + ~{inputBam} \ + > ~{outputPath} + } + + output { + File stats = outputPath + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The input BAM file.", category: "required"} + inputBamIndex: {description: "The index for the BAM file.", category: "required"} + outputPath: {description: "The path to write the ouput to.", category: "required"} + + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + + task Markdup { input { Array[File] inputBams From 8b51723e40a28d8894015f8b4dad21fcb0cb4bd1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:39:56 +0100 Subject: [PATCH 046/439] add extractSigPredictHRD --- extractSigPredictHRD.wdl | 69 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 extractSigPredictHRD.wdl diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl new file mode 100644 index 00000000..6aa5ff1d --- /dev/null +++ b/extractSigPredictHRD.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ExtractSigPredictHRD { + input { + String outputDir = "." + String sampleName + File snvIndelVcf + File snvIndelVcfIndex + File svVcf + File svVcfIndex + + String memory = "8G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" + } + + command { + extractSigPredictHRD.R \ + ~{outputDir} \ + ~{sampleName} \ + ~{snvIndelVcf} \ + ~{svVcf} \ + } + + output { + File chordPrediction = "~{outputDir}/~{sampleName}_chord_prediction.txt" + File chordSignatures = "~{outputDir}/~{sampleName}_chord_signatures.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The directory the outout will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + snvIndelVcf: {description: "A VCF file with SNVs and indels.", category: "required"} + snvIndelVcfIndex: {description: "The index for the SNV/indel VCF file.", category: "required"} + svVcf: {description: "A VCF file with SVs.", category: "required"} + svVcfIndex: {description: "The index for the SV VCF file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From 558c8088dee1d252fb668303874684fd62741409 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 4 Feb 2021 15:38:11 +0100 Subject: [PATCH 047/439] add health-checker --- hmftools.wdl | 106 +++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 45 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 67c49be3..5bad1dbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,67 @@ task GripssHardFilterApplicationKt { } } -# task HealthChecker { -# # WIP -# input { -# String normalName -# String tumorName -# -# String javaXmx = "10G" -# } -# -# command { -# java -Xmx10G \ -# -jar /opt/tools/health-checker/3.1/health-checker.jar \ -# -reference ~{normalName} \ -# -tumor ~{tumorName} \ -# -metrics_dir ~{metricsPath} \ -# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -# -output_dir ~{outputDir} -# } -# -# # super("health-checker", -# # Versions.HEALTH_CHECKER, -# # "health-checker.jar", -# # "10G", -# # Lists.newArrayList("-reference", -# # referenceSampleName, -# # "-tumor", -# # tumorSampleName, -# # "-ref_wgs_metrics_file", -# # referenceMetricsPath, -# # "-tum_wgs_metrics_file", -# # tumorMetricsPath, -# # "-ref_flagstat_file", -# # referenceFlagstatPath, -# # "-tum_flagstat_file", -# # tumorFlagstatPath, -# # "-purple_dir", -# # purplePath, -# # "-output_dir", -# # outputPath)); -# -# output { -# -# } -# } +task HealthChecker { + # WIP + input { + String outputDir = "." + String normalName + File normalFlagstats + File normalMetrics + String tumorName + File tumorFlagstats + File tumorMetrics + Array[File]+ purpleOutput + + String javaXmx = "10G" + String memory = "11G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/health-checker:3.2" + } + + command { + set -e + mkdir -p ~{outputDir} + health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -reference ~{normalName} \ + -ref_flagstat_file ~{normalFlagstats} \ + -ref_wgs_metrics_file ~{normalMetrics} \ + -tumor ~{tumorName} \ + -tum_flagstat_file ~{tumorFlagstats} \ + -tum_wgs_metrics_file ~{tumorMetrics} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + + output { + File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The path the output will be written to.", category:"required"} + normalName: {description: "The name of the normal sample.", category: "required"} + normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + purpleOutput: {description: "The files from purple's output directory.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} task Linx { input { From eac2b302158e412df419705eba39ebaeedc1c11f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Feb 2021 16:10:52 +0100 Subject: [PATCH 048/439] small adjustments --- bwa.wdl | 4 +++- gridss.wdl | 6 +++--- hmftools.wdl | 12 ++++++------ sambamba.wdl | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 203f0dde..e2393481 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -28,6 +28,7 @@ task Mem { String outputPrefix Boolean sixtyFour = false Boolean usePostalt = false + Boolean useSoftclippingForSupplementary = false Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 @@ -36,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -56,6 +57,7 @@ task Mem { mkdir -p "$(dirname ~{outputPrefix})" bwa mem \ -t ~{threads} \ + ~{if useSoftclippingForSupplementary then "-Y" else ""} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ ~{bwaIndex.fastaFile} \ ~{read1} \ diff --git a/gridss.wdl b/gridss.wdl index 9bafa6d6..0148fcf6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,9 +35,9 @@ task GRIDSS { String? normalLabel Int jvmHeapSizeGb = 30 - Int threads = 2 - Int timeMinutes = ceil(2880 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int threads = 4 + Int timeMinutes = ceil(5760 / threads) + 10 + String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5bad1dbe..90564060 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -181,13 +181,13 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ @@ -234,13 +234,13 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} diff --git a/sambamba.wdl b/sambamba.wdl index bb63f665..5284363e 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 2792266fa2950ec9cbe15530374465a99c65a43a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 09:52:04 +0100 Subject: [PATCH 049/439] update versions, memory, etc --- bwa.wdl | 2 +- extractSigPredictHRD.wdl | 2 ++ gridss.wdl | 9 ++++++++- hmftools.wdl | 29 ++++++++++++++--------------- sambamba.wdl | 6 +++--- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index e2393481..faa4121a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 6aa5ff1d..69c41ef8 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -28,6 +28,7 @@ task ExtractSigPredictHRD { File snvIndelVcfIndex File svVcf File svVcfIndex + Boolean hg38 = false String memory = "8G" Int timeMinutes = 15 @@ -40,6 +41,7 @@ task ExtractSigPredictHRD { ~{sampleName} \ ~{snvIndelVcf} \ ~{svVcf} \ + ~{if hg38 then "RG_38" else "RG_37"} } output { diff --git a/gridss.wdl b/gridss.wdl index 0148fcf6..98d730cf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,23 +33,28 @@ task GRIDSS { File? normalBam File? normalBai String? normalLabel + File? blacklistBed + File? repeatmaskerBed Int jvmHeapSizeGb = 30 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" gridss \ + -w . \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{"--blacklist " + blacklistBed} \ + ~{"--repeatmaskerbed " + repeatmaskerBed} ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -80,6 +85,8 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} + blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} + repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 90564060..e98ac7ba 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -115,7 +115,7 @@ task Cobalt { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1200 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } command { @@ -172,6 +172,8 @@ task GripssApplicationKt { input { File inputVcf String outputPath = "gripss.vcf.gz" + String tumorName + String normalName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -182,13 +184,15 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ + -tumor ~{tumorName} \ + ~reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -235,12 +239,12 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} @@ -357,7 +361,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } command { @@ -455,13 +459,13 @@ task Purple { File referenceFastaFai File referenceFastaDict File driverGenePanel - File hotspots + File somaticHotspots Int threads = 1 Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } command { @@ -479,13 +483,8 @@ task Purple { -ref_genome ~{referenceFasta} \ -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ - -hotspots ~{hotspots} \ + -somatic_hotspots ~{somaticHotspots} \ -threads ~{threads} - - # TODO if shallow also the following: - #-highly_diploid_percentage 0.88 \ - #-somatic_min_total 100 \ - #-somatic_min_purity_spread 0.1 } output { @@ -587,7 +586,7 @@ task Sage { String javaXmx = "32G" String memory = "33G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } command { diff --git a/sambamba.wdl b/sambamba.wdl index 5284363e..b6ef5e9b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -34,8 +34,8 @@ task Flagstat { command { sambamba flagstat \ - -t ~{threads} \ - ~{inputBam} \ + -t ~{threads} \ + ~{inputBam} \ > ~{outputPath} } @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 943f9541ebc002ea576898067b7f220112cb79fc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 13:56:15 +0100 Subject: [PATCH 050/439] fix parameter_meta purple --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index e98ac7ba..3fe845a6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -543,7 +543,7 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 8283c5099ba6fad50b34043033380e2898d3db66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 18 Feb 2021 11:03:27 +0100 Subject: [PATCH 051/439] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 98d730cf..b4b36b01 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -54,7 +54,7 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} + ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz From adc3523872df29405e1741eaa2dfa2a67e61a51d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 11:00:46 +0100 Subject: [PATCH 052/439] fix sage --- hmftools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3fe845a6..49e4eeb4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,9 +590,7 @@ task Sage { } command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ + SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ From 42f6cd2a9c38ba2da8f07db2f7df17b70d99a5d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 22 Feb 2021 10:27:48 +0100 Subject: [PATCH 053/439] fix purple output for newer version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 49e4eeb4..31330a7d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Purple { } output { - File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -512,7 +512,7 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" - Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From 8623c57dbca49543e4a5ee8108316ef46242bcde Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:05:13 +0100 Subject: [PATCH 054/439] add circos configs to purple output --- hmftools.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 31330a7d..2fad41fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -512,6 +512,17 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" + File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" + File circosGaps = "~{outputDir}/circos/gaps.txt" + File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, @@ -519,6 +530,7 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + Array[File] circos = [] } runtime { From 13967b1793fc585d9f3753d87b618fd2c6819736 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:13:06 +0100 Subject: [PATCH 055/439] add array for circos confs --- hmftools.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2fad41fe..dc31f41b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -513,7 +513,7 @@ task Purple { File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" - File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" @@ -530,7 +530,9 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] - Array[File] circos = [] + Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, + circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, + circosSnp] } runtime { From 359456efd96ccd2326657e5dec543c5a73efd92c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Mar 2021 17:07:41 +0100 Subject: [PATCH 056/439] increase time and memory for picard collectWgsMetrics --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 8dc4e0bf..aefb4c21 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } From 5db3dd912fbf3b8cdaefefe198a59e998ebdd89a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 11:15:41 +0100 Subject: [PATCH 057/439] update memory and timeMinutes for cutadapt and bwa --- bwa.wdl | 4 ++-- cutadapt.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index faa4121a..cc8ea0c6 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..bca29db3 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -84,7 +84,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } From 2aba7899cdf1a76d2afa089e230335bf0843b72c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 12:59:54 +0100 Subject: [PATCH 058/439] increase memory bwa --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index cc8ea0c6..670f00d2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From f83b315ebb5318147ce3f08d8ba0d313146753d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 30 Mar 2021 09:55:44 +0200 Subject: [PATCH 059/439] add more memory to sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index b6ef5e9b..b4eca66b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize + # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0862eab38451da3de6615ad419ea56402690e7a0 Mon Sep 17 00:00:00 2001 From: dcats Date: Mon, 12 Apr 2021 16:50:33 +0200 Subject: [PATCH 060/439] memory and runtime adjustements --- bcftools.wdl | 4 ++-- gridss.wdl | 4 ++-- hmftools.wdl | 10 +++++----- sambamba.wdl | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 4827a631..28b62696 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "256M" - Int timeMinutes = 10 + ceil(size(inputFile, "G")) + String memory = "5G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/gridss.wdl b/gridss.wdl index b4b36b01..11014a88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -36,7 +36,7 @@ task GRIDSS { File? blacklistBed File? repeatmaskerBed - Int jvmHeapSizeGb = 30 + Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" @@ -70,7 +70,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 1}G" + memory: "~{jvmHeapSizeGb + 25}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index dc31f41b..553879f9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "33G" - String javaXmx = "32G" + String memory = "52G" + String javaXmx = "50G" Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -341,7 +341,7 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta + File referenceFasta #FIXME Not used in pipeline5? File referenceFastaFai File referenceFastaDict String refGenomeVersion @@ -597,8 +597,8 @@ task Sage { File? coverageBed Int threads = 2 - String javaXmx = "32G" - String memory = "33G" + String javaXmx = "50G" + String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } diff --git a/sambamba.wdl b/sambamba.wdl index b4eca66b..c8d9e11c 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -69,7 +69,7 @@ task Markdup { String outputPath Int compressionLevel = 1 # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. - Int sortBufferSize = 2048 + Int sortBufferSize = 4096 Int ioBufferSize = 128 Boolean removeDuplicates = false @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize + # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 22933762f7683b98535da38de2954db41c44be37 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:31:58 +0200 Subject: [PATCH 061/439] add germline options to purple --- hmftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 553879f9..e8b60bc0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -452,6 +452,7 @@ task Purple { Array[File]+ cobaltOutput File gcProfile File somaticVcf + File germlineVcf File filteredSvVcf File fullSvVcf File fullSvVcfIndex @@ -460,6 +461,7 @@ task Purple { File referenceFastaDict File driverGenePanel File somaticHotspots + File germlineHotspots Int threads = 1 Int timeMinutes = 60 @@ -477,6 +479,7 @@ task Purple { -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ + -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ @@ -484,6 +487,7 @@ task Purple { -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ -somatic_hotspots ~{somaticHotspots} \ + -germline_hotspots ~{germlineHotspots} \ -threads ~{threads} } @@ -550,6 +554,7 @@ task Purple { cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} somaticVcf: {description: "The somatic variant calling results.", category: "required"} + germlineVcf: {description: "The germline variant calling results.", category: "required"} filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} @@ -557,7 +562,8 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} + germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0013a03155aed7748864308f9fda5b4f07d79706 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:37:54 +0200 Subject: [PATCH 062/439] remove ref_genome from Linx --- hmftools.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index e8b60bc0..1a99caf6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,9 +341,6 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta #FIXME Not used in pipeline5? - File referenceFastaFai - File referenceFastaDict String refGenomeVersion String outputDir = "./linx" File fragileSiteCsv @@ -369,7 +366,6 @@ task Linx { -sample ~{sampleName} \ -sv_vcf ~{svVcf} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ From bf43886539cb8d40d5b9637e3920ffba8d5f80a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:41:07 +0200 Subject: [PATCH 063/439] remove unused parameter_meta --- hmftools.wdl | 3 --- 1 file changed, 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1a99caf6..48c6099c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -414,9 +414,6 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} From a4d5102d42edf0d7d5795f5860817b38e680e597 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 14 Apr 2021 13:27:26 +0200 Subject: [PATCH 064/439] add gridss properties --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 11014a88..ef5ae9e5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,6 +35,7 @@ task GRIDSS { String? normalLabel File? blacklistBed File? repeatmaskerBed + File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 @@ -50,9 +51,10 @@ task GRIDSS { --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ + ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ @@ -87,6 +89,7 @@ task GRIDSS { normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} + gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} From e81de32b4db6b48ff458f368b253010bcbff7187 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 15 Apr 2021 11:50:41 +0200 Subject: [PATCH 065/439] upgrade sage version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 48c6099c..0a566d8e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -599,7 +599,7 @@ task Sage { String javaXmx = "50G" String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } command { From 51e524a7fa1ffe7664882941e7fc0ffc7aa14ad3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 20 Apr 2021 12:25:16 +0200 Subject: [PATCH 066/439] add missing purple outputs, fix typo --- hmftools.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0a566d8e..3dd52daf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -192,13 +192,13 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - ~reference ~{normalName} \ + -reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -486,6 +486,7 @@ task Purple { output { File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" + File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -497,6 +498,8 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" @@ -524,7 +527,7 @@ task Purple { purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, - purpleVersion] + purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, From 32c08100bcbf0590d7c1d69e08cdae2e3c640e99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 May 2021 14:16:16 +0200 Subject: [PATCH 067/439] adjust runtime settings --- bcftools.wdl | 4 ++-- bwa.wdl | 4 ++-- extractSigPredictHRD.wdl | 4 ++-- gridss.wdl | 6 +++--- hmftools.wdl | 43 ++++++++++++++++++++-------------------- picard.wdl | 6 +++--- sambamba.wdl | 2 +- samtools.wdl | 2 ++ snpeff.wdl | 6 +++--- 9 files changed, 39 insertions(+), 38 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28b62696..8fab933a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "5G" - Int timeMinutes = 60 + ceil(size(inputFile, "G")) + String memory = "1G" + Int timeMinutes = 30 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index 670f00d2..1cb170b7 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 69c41ef8..2b5d9781 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,8 +30,8 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "8G" - Int timeMinutes = 15 + String memory = "3G" + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/gridss.wdl b/gridss.wdl index ef5ae9e5..acafc911 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 4 - Int timeMinutes = ceil(5760 / threads) + 10 + Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -72,7 +72,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 25}G" + memory: "~{jvmHeapSizeGb + 15}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -117,7 +117,7 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) } command { diff --git a/hmftools.wdl b/hmftools.wdl index 3dd52daf..9b22c10d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "52G" String javaXmx = "50G" - Int timeMinutes = 1200 + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -112,9 +112,9 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 1200 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } @@ -181,9 +181,9 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -236,9 +236,9 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -274,7 +274,6 @@ task GripssHardFilterApplicationKt { } task HealthChecker { - # WIP input { String outputDir = "." String normalName @@ -285,9 +284,9 @@ task HealthChecker { File tumorMetrics Array[File]+ purpleOutput - String javaXmx = "10G" - String memory = "11G" - Int timeMinutes = 10 + String javaXmx = "2G" + String memory = "1G" + Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -355,9 +354,9 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 30 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } @@ -457,9 +456,9 @@ task Purple { File germlineHotspots Int threads = 1 - Int timeMinutes = 60 - String memory = "13G" - String javaXmx = "12G" + Int timeMinutes = 30 + String memory = "9G" + String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } @@ -600,8 +599,8 @@ task Sage { Int threads = 2 String javaXmx = "50G" - String memory = "75G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String memory = "60G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } diff --git a/picard.wdl b/picard.wdl index aefb4c21..9a935045 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "33G" - String javaXmx = "32G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } diff --git a/sambamba.wdl b/sambamba.wdl index c8d9e11c..e78f50b6 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..954b5d4e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -27,6 +27,7 @@ task BgzipAndIndex { String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -55,6 +56,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/snpeff.wdl b/snpeff.wdl index 85709079..4a3640c7 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,9 +36,9 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "50G" - String javaXmx = "49G" - Int timeMinutes = 60 #FIXME + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" } From ffda341fae7bc7cc519451b018e43a76cae34d8e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 May 2021 14:37:56 +0200 Subject: [PATCH 068/439] adjust runtime settings --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8fab933a..059cc39d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "1G" - Int timeMinutes = 30 + ceil(size(inputFile, "G")) + String memory = "2G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c3df943f2964d2d5551baaf64c9bb2e2d9c198bf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 25 May 2021 13:01:25 +0200 Subject: [PATCH 069/439] update memory bcftools --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 059cc39d..5170a01f 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "2G" + String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From de03877e2e831285daaccc820db98da0897e1dac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Jun 2021 13:53:25 +0200 Subject: [PATCH 070/439] add cuppa and cuppa chart --- hmftools.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9b22c10d..779820a3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,112 @@ task Cobalt { } } +task Cuppa { + input { + Array[File]+ linxOutput + Array[File]+ purpleOutput + String sampleName + Array[String]+ categories = ["DNA"] + Array[File]+ referenceData + File purpleSvVcf + File purpleSvVcfIndex + File purpleSomaticVcf + File purpleSomaticVcfIndex + String outputDir = "./cuppa" + + String javaXmx = "4G" + String memory = "5G" + Int time_minutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p sampleData ~{outputDir} + ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput} + cuppa -Xmx~{javaXmx} \ + -output_dir ~{outputDir} \ + -output_id ~{sampleName} \ + -categories '~{sep="," categories}' \ + -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \ + -sample_data_dir sampleData \ + -sample_data ~{sampleName} \ + -sample_sv_file ~{purpleSvVcf} \ + -sample_somatic_vcf ~{purpleSomaticVcf} + } + + output { + File cupData = "~{outputDir}/~{sampleName}.cup.data.csv" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + linxOutput: {description: "The files produced by linx.", category: "required"} + purpleOutput: {description: "The files produced by purple.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + categories: {description: "The classifiers to use.", category: "advanced"} + referenceData : {description: "The reference data.", category: "required"} + purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"} + purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"} + purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"} + purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CuppaChart { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "4G" + Int time_minutes = 5 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p ~{outputDir} + cuppa-chart \ + -sample ~{sampleName} + -sample_data ~{cupData} + -output_dir ~{outputDir} + } + + output { + File cuppaChart = "~{outputDir}/~{sampleName}.cuppa.chart.png" + File cuppaConclusion = "~{outputDir}/~{sampleName}.cuppa.conclusion.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category:"common"} + cupData: {description: "The cuppa output.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category:"common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { input { File inputVcf From c0477edfd5904f1de11d7ea0d60e8b65e36e0bed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Jun 2021 10:25:47 +0200 Subject: [PATCH 071/439] fix typo --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 779820a3..8beb5c76 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -183,7 +183,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" - Int time_minutes = 10 + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.4" } @@ -239,7 +239,7 @@ task CuppaChart { String outputDir = "./cuppa" String memory = "4G" - Int time_minutes = 5 + Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.4" } From 1be4badcf451ccad2d2198dbfec4d97aaf68af45 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 21 Jun 2021 11:54:52 +0200 Subject: [PATCH 072/439] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8beb5c76..868d03fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "52G" - String javaXmx = "50G" + String memory = "70G" + String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 7a693a69f9a59755d527d733946406eed3a2f124 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 11:38:04 +0200 Subject: [PATCH 073/439] remove rainfall plot output --- hmftools.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 868d03fe..8e60351b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -615,7 +615,6 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -634,7 +633,7 @@ task Purple { purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + segmentPlot, somaticClonalityPlot, somaticPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 664325fc50d19e074d80780cae322157f07035ed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 13:34:07 +0200 Subject: [PATCH 074/439] fix missing backslashes --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8e60351b..1b9d8d22 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -247,8 +247,8 @@ task CuppaChart { set -e mkdir -p ~{outputDir} cuppa-chart \ - -sample ~{sampleName} - -sample_data ~{cupData} + -sample ~{sampleName} \ + -sample_data ~{cupData} \ -output_dir ~{outputDir} } From 545f63af658df8fc515672589a7bfb7e81ed2be3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 11:36:52 +0200 Subject: [PATCH 075/439] update some version and add repeatmasker annotation for gridss --- gridss.wdl | 123 +++++++++++++++++++++++++++++++-------------------- hmftools.wdl | 9 ++-- 2 files changed, 79 insertions(+), 53 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index acafc911..3844c602 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -22,6 +22,61 @@ version 1.0 import "bwa.wdl" as bwa +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + File viralReferenceFai + File viralReferenceDict + File viralReferenceImg + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + } + + command { + AnnotateInsertedSequence -Xmx~{javaXmx} \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam @@ -34,13 +89,12 @@ task GRIDSS { File? normalBai String? normalLabel File? blacklistBed - File? repeatmaskerBed File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } command { @@ -56,7 +110,6 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -88,7 +141,6 @@ task GRIDSS { normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} - repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} @@ -104,64 +156,37 @@ task GRIDSS { } } -task AnnotateInsertedSequence { +task GridssAnnotateVcfRepeatmasker { input { - File inputVcf - String outputPath = "gridss.annotated.vcf.gz" - File viralReference - File viralReferenceFai - File viralReferenceDict - File viralReferenceImg + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - Int threads = 8 - String javaXmx = "8G" - String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + String memory = "4G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) } command { - java -Xmx~{javaXmx} \ - -Dsamjdk.create_index=true \ - -Dsamjdk.use_async_io_read_samtools=true \ - -Dsamjdk.use_async_io_write_samtools=true \ - -Dsamjdk.use_async_io_write_tribble=true \ - -Dsamjdk.buffer_size=4194304 \ - -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ - gridss.AnnotateInsertedSequence \ - REFERENCE_SEQUENCE=~{viralReference} \ - INPUT=~{inputVcf} \ - OUTPUT=~{outputPath} \ - ALIGNMENT=APPEND \ - WORKING_DIR='.' \ - WORKER_THREADS=~{threads} + gridss_annotate_vcf_repeatmasker + --output ~{outputPath} \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -w . \ + ~{gridssVcf} } output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File annotatedVcf = outputPath + File annotatedVcfIndex = "~{outputPath}.tbi" } runtime { - cpu: threads - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF file.", category: "required"} - outputPath: {description: "The path the output will be written to.", category: "common"} - viralReference: {description: "A fasta file with viral sequences.", category: "required"} - viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} - viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} - + gridssVcf: {description: "The GRIDSS output.", category: "required"} + gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} +} \ No newline at end of file diff --git a/hmftools.wdl b/hmftools.wdl index 1b9d8d22..7d6f1547 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -304,7 +304,8 @@ task GripssApplicationKt { -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} \ + -paired_normal_tumor_ordinals } output { @@ -463,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" } command { @@ -565,7 +566,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" } command { @@ -706,7 +707,7 @@ task Sage { String javaXmx = "50G" String memory = "60G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } command { From 04c65ab38a2d91051e3c0aa90c67738b755a4921 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 15:29:21 +0200 Subject: [PATCH 076/439] add virusbreakend --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 3844c602..52e039d1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,7 +164,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) + Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) } command { @@ -181,6 +181,12 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} @@ -189,4 +195,57 @@ task GridssAnnotateVcfRepeatmasker { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} + +task Virusbreakend { + input { + File bam + File bamIndex + File referenceFasta + File virusbreakendDB + String outputPath = "./virusbreakend.vcf" + + String memory = "75G" + Int threads = 8 + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 180 + } + + command { + mkdir virusbreakenddb + tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 + virusbreakend \ + --output ~{outputPath} \ + --workingdir . \ + --reference ~{referenceFasta} \ + --db virusbreakenddb \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -t ~{threads} \ + ~{bam} + } + + output { + File vcf = outputPath + File summary = "~{outputPath}.summary.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + bam: {description: "A BAM file.", category: "required"} + bamIndex: {description: "The index for the BAM file.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + threads: {description: "The number of the threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From c2f223eb6a487d7c5bca957bdaaf830d0522d3cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 13:26:01 +0200 Subject: [PATCH 077/439] add virusinterpreter --- hmftools.wdl | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 7d6f1547..f1617bbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -778,3 +778,54 @@ task Sage { category: "advanced"} } } + +task VirusInterpreter { + input { + String sampleId + File virusBreakendTsv + File taxonomyDbTsv + File virusInterpretationTsv + File virusBlacklistTsv + String outputDir = "." + + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + } + + command { + virus-interpreter -Xmx~{javaXmx} \ + -sample_id ~{sampleId} \ + -virus_breakend_tsv ~{virusBreakendTsv} \ + -taxonomy_db_tsv ~{taxonomyDbTsv} \ + -virus_interpretation_tsv ~{virusInterpretationTsv} \ + -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -output_dir ~{outputDir} + } + + output { + File virusAnnotatedTsv = "~{outputDir}/~{sampleId}.virus.annotated.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleId: {description: "The name of the sample.", category: "required"} + virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} + taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} + virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} + virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From f169d78589c3e4d2a97892cfc3fb685d6c217d6c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 16:02:28 +0200 Subject: [PATCH 078/439] add protect --- hmftools.wdl | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index f1617bbe..646d01ea 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -542,6 +542,101 @@ task Linx { } } +task Protect { + input { + String refGenomeVersion + String tumorName + String normalName + Array[String]+ sampleDoids + String outputDir = "." + Array[File]+ serveActionability + File doidsJson + File purplePurity + File purpleQc + File purpleDriverCatalogSomatic + File purpleDriverCatalogGermline + File purpleSomaticVariants + File purpleSomaticVariantsIndex + File purpleGermlineVariants + File purpleGermlineVariantsIndex + File purpleGeneCopyNumber + File linxFusion + File linxBreakend + File linxDriversCatalog + File chordPrediction + File annotatedVirus + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biowdl/protect:v1.4" + } + + command { + protect -Xmx~{javaXmx} \ + -ref_genome_version ~{refGenomeVersion} \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{normalName} \ + -primary_tumor_doids ~{sep=";" sampleDoids} \ + -output_dir ~{outputDir} \ + -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ + -doid_json ~{doidsJson} \ + -purple_purity_tsv ~{purplePurity} \ + -purple_qc_file ~{purpleQc} \ + -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ + -purple_germline_driver_catalog_tsv ~{purpleDriverCatalogGermline} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariants} \ + -purple_germline_variant_vcf ~{purpleGermlineVariants} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumber} \ + -linx_fusion_tsv ~{linxFusion} \ + -linx_breakend_tsv ~{linxBreakend} \ + -linx_driver_catalog_tsv ~{linxDriversCatalog} \ + -chord_prediction_txt ~{chordPrediction} \ + -annotated_virus_tsv ~{annotatedVirus} + } + + output { + File protectTsv = "~{outputDir}/~{tumorName}.protect.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} + doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + purplePurity: {description: "The purity file generated by purple.", category: "required"} + purpleQc: {description: "The QC file generated by purple.", category: "required"} + purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} + purpleDriverCatalogGermline: {description: "The germline driver catalog generated by purple.", category: "required"} + purpleSomaticVariants: {description: "The somatic VCF generated by purple.", category: "required"} + purpleSomaticVariantsIndex: {description: "The index for the somatic VCF generated by purple.", category: "required"} + purpleGermlineVariants: {description: "The germline VCF generated by purple.", category: "required"} + purpleGermlineVariantsIndex: {description: "The index of the germline VCF generated by purple.", category: "required"} + purpleGeneCopyNumber: {description: "The gene copy number file generated by purple.", category: "required"} + linxFusion: {description: "The fusion file generated by linx.", category: "required"} + linxBreakend: {description: "The breakend file generated by linx.", category: "required"} + linxDriversCatalog: {description: "The driver catalog generated generated by linx.", category: "required"} + chordPrediction: {description: "The chord prediction file.", category: "required"} + annotatedVirus: {description: "The virus-interpreter output.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName From 3c92beac7d694209332b66e6869c7c7b6a3ea885 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Jul 2021 12:28:31 +0200 Subject: [PATCH 079/439] remove tabix from gridss --- gridss.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 52e039d1..6c8899e4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -112,7 +112,6 @@ task GRIDSS { ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ ~{tumorBam} - tabix -p vcf ~{outputPrefix}.vcf.gz samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai } From 44a70a394df432fe678a0fa82ef015acf3e5c6d7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 09:42:45 +0200 Subject: [PATCH 080/439] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 6c8899e4..f9a92f56 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -167,7 +167,7 @@ task GridssAnnotateVcfRepeatmasker { } command { - gridss_annotate_vcf_repeatmasker + gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ From 7c5ce8c031f34744f9759e59b2617113120a40be Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 10:50:50 +0200 Subject: [PATCH 081/439] set default timeMinutes GridssAnnotateVcfRepeatmasker to 120 --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f9a92f56..02f32297 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) + Int timeMinutes = 120 } command { From 497f12a7446dc80873a66fa00db1c9bbc0eece99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:47:29 +0200 Subject: [PATCH 082/439] adjust repeatmasker time --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 02f32297..db20a203 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 120 + Int timeMinutes = 1440 } command { From 7f4433f50b5ef8deaeb1d86beaaaae5ff07bae41 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 15:25:28 +0200 Subject: [PATCH 083/439] fix missing memory runtime BgzipAndIndex --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index d34df51e..c8837d94 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -46,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } From 477f00f57a1bf445672da7b7be7ed999e6230e93 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Jul 2021 09:13:07 +0200 Subject: [PATCH 084/439] increase time for GridssAnnotateVcfRepeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index db20a203..f137f968 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1440 + Int timeMinutes = 2880 } command { From 4a32a443a29e324b8b01fac1fdbc01a7f2078f79 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:39:47 +0200 Subject: [PATCH 085/439] increase memory repeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f137f968..ad230d05 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -161,7 +161,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "4G" + String memory = "50G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } From 9d3b5a556bd642d8dc8d098694497a5a3b1950fb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:46:34 +0200 Subject: [PATCH 086/439] add threads to repeatmasker --- gridss.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index ad230d05..069d6953 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -162,6 +162,7 @@ task GridssAnnotateVcfRepeatmasker { String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" String memory = "50G" + Int threads = 4 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } @@ -171,6 +172,7 @@ task GridssAnnotateVcfRepeatmasker { --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ + -t ~{threads} \ ~{gridssVcf} } @@ -180,6 +182,7 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage @@ -189,6 +192,7 @@ task GridssAnnotateVcfRepeatmasker { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From c80402130bdb7471e8f37fece8cb643625a0df02 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 09:58:51 +0200 Subject: [PATCH 087/439] fix Xmx in AnnotateInsertedSequence --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 069d6953..aedac9ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,9 @@ task AnnotateInsertedSequence { } command { - AnnotateInsertedSequence -Xmx~{javaXmx} \ + set -e + _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ OUTPUT=~{outputPath} \ @@ -215,6 +217,7 @@ task Virusbreakend { } command { + set -e mkdir virusbreakenddb tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 virusbreakend \ From 28b1a835d558d8ecd60682e9105731b6762f4c30 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:32:44 +0200 Subject: [PATCH 088/439] fix wrong placeholder --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index aedac9ab..66e27ff0 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -40,7 +40,7 @@ task AnnotateInsertedSequence { command { set -e - _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From d28a2a529ede9ffc89b18628cc012c846354e096 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:53:24 +0200 Subject: [PATCH 089/439] typo --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 66e27ff0..fcfed095 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,12 +35,12 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + Int timeMinutes = 120 } command { set -e - _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From 572114885be2bd0243ac59898c223fbf954e1510 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:40:29 +0200 Subject: [PATCH 090/439] update gripss version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 646d01ea..9dc78dd8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { From f62a7424b88a1de1e6c1791aeff7c020a60939cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:42:20 +0200 Subject: [PATCH 091/439] fix gripss version in command --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 9dc78dd8..6a086d37 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -295,7 +295,7 @@ task GripssApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ -reference ~{normalName} \ @@ -351,7 +351,7 @@ task GripssHardFilterApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} From e470f59fa587bef9dd075eb28ba6317be89a8416 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 10:01:09 +0200 Subject: [PATCH 092/439] update purple and gripss versions --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6a086d37..8c38c501 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -661,7 +661,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" } command { From c47163aa1c9d67b5d675444d06afe36e5ee31ec9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 13:24:47 +0200 Subject: [PATCH 093/439] change docker image for purple --- hmftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8c38c501..bf79070e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -661,7 +661,8 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" + # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" } command { From ed6061d1671ba091992248375e613daf57fd544d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 14:20:38 +0200 Subject: [PATCH 094/439] fix linx output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index bf79070e..a327fd0b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Linx { } output { - File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv" File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" From ab17de947e0509b853a60e87e80399e1ca83f826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 13:37:37 +0200 Subject: [PATCH 095/439] add task for peach --- peach.wdl | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 peach.wdl diff --git a/peach.wdl b/peach.wdl new file mode 100644 index 00000000..9321d6bf --- /dev/null +++ b/peach.wdl @@ -0,0 +1,77 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Peach { + input { + File transcriptTsv + File germlineVcf + File germlineVcfIndex + File tumorName + File normalName + String outputDir = "./peach" + File panelJson + + String memory = "8G" + String dockerImage = "quay.io/biowdl/peach:v1.0" + Int timeMinutes = 20 + } + + command { + peach \ + --recreate_bed \ + --transcript_tsv ~{transcriptTsv} \ + ~{germlineVcf} \ + ~{tumorName} \ + ~{normalName} \ + 1.0 \ + ~{outputDir} \ + ~{panelJson} \ + vcftools + } + + output { + File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" + File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" + File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" + Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} + germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} + germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample", category: "required"} + outputDir: {description: "The directory the ouput should be written to.", category: "required"} + panelJson: {description: "A JSON describing the panel.", category: "required"} + + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 1648c818b856f22ed9e7c8b6443d2e9bc072eb6a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 14:21:41 +0200 Subject: [PATCH 096/439] rename array output peach --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 9321d6bf..72c7fde6 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] } runtime { From dcafd29087866bfa4bc464e9fd301e8de234c138 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 15:20:03 +0200 Subject: [PATCH 097/439] fix validation issues --- peach.wdl | 2 +- samtools.wdl | 2 -- scripts | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/peach.wdl b/peach.wdl index 72c7fde6..5e0746aa 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] } runtime { diff --git a/samtools.wdl b/samtools.wdl index 7eb86351..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -28,7 +28,6 @@ task BgzipAndIndex { String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -59,7 +58,6 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/scripts b/scripts index 84690a30..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From dce31f572b08b3ef1ff3209f101ec4e3e838646c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 29 Jul 2021 12:05:22 +0200 Subject: [PATCH 098/439] update linx version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index a327fd0b..1e25938d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -464,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" } command { From 09d899b85aec47bcb065cb8b584e703828d488e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Aug 2021 13:27:41 +0200 Subject: [PATCH 099/439] add bedtools coverage --- bedtools.wdl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index 3dbf93cb..f8713d2e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -66,6 +66,55 @@ task Complement { } } +task Coverage { + input { + File genomeFile + File a + File? aIndex + File b + File? bIndex + String outputPath = "./coverage.tsv" + + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + command { + bedtools coverage \ + -sorted \ + -g ~{genomeFile} \ + -a ~{a} \ + -b ~{b} \ + -d \ + > ~{outputPath} + } + + output { + File coverageTsv = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + genomeFile: {description: "A file listing the chromosomes and their lengths.", category: "required"} + a: {description: "The file containing the regions for which the coverage will be counted.", category: "required"} + aIndex: {description: "An index for the file given as `a`.", category: "common"} + b: {description: "The file in which the coverage will be counted. Likely a BAM file.", category: "required"} + bIndex: {description: "An index for the file given as `b`.", category: "common"} + outputPath: {description: "The path the ouptu will be written to.", category: "common"} + + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} + task Merge { input { File inputBed From 70cda88f96eecabb9b9a8d5f75f88515c0840a8f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 18 Aug 2021 16:00:25 +0200 Subject: [PATCH 100/439] add deconstructSigs task --- deconstructsigs.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 deconstructsigs.wdl diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl new file mode 100644 index 00000000..ef47e3e3 --- /dev/null +++ b/deconstructsigs.wdl @@ -0,0 +1,66 @@ +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +version 1.0 + +task DeconstructSigs { + input { + File signaturesMatrix + File signaturesReference + String outputPath = "./signatures.rds" + + Int timeMinutes = 15 + String memory = "4G" + String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" + } + + command { + R --no-echo << EOF + library(deconstructSigs) + tumor <- read.table("~{signaturesMatrix}", check.names=F) + ref <- data.frame(t(read.table("~{signaturesReference}", check.names=F, header=T, row.names="Type")), check.names=F) + tumor <- tumor[,colnames(ref)] + + sigs <- whichSignatures(tumor.ref=tumor, row.names(tumor), signatures.ref=ref, contexts.needed=T) + saveRDS(sigs, "~{outputPath}") + EOF + } + + output { + File signatureRDS = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + signaturesMatrix: {description: "A table containing columns represtenting mutation types (matching the types in the signatures reference) and one row with the counts for each of these types for the sample of intrest.", + category: "required"} + signaturesReference: {description: "A table describing the mutational signatures, formatted like those provided by COSMIC.", + category: "required"} + outputPath: {description: "The location the output will be written to.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 26574bf26bef2663e9a67fe99c2a241762eb4365 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 20 Aug 2021 13:43:07 +0200 Subject: [PATCH 101/439] update bedtools version for coverage --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index f8713d2e..1d956cab 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -77,7 +77,7 @@ task Coverage { String memory = "8G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } command { From e8df466dfba91be4e2c08e9fa57607ad48936d01 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Aug 2021 12:11:39 +0200 Subject: [PATCH 102/439] fix incorrect type --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index 5e0746aa..b57842f7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -25,8 +25,8 @@ task Peach { File transcriptTsv File germlineVcf File germlineVcfIndex - File tumorName - File normalName + String tumorName + String normalName String outputDir = "./peach" File panelJson From d76faa5a05528e6a74488b46a18bdfcd1a9402ea Mon Sep 17 00:00:00 2001 From: cedrick Date: Fri, 17 Sep 2021 09:55:37 +0200 Subject: [PATCH 103/439] update bcftools.wdk --- bcftools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 0cbfdefd..0738d156 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,7 @@ task View { input { File inputFile String outputPath = "output.vcf" - + Boolean excludeUncalled = false String? exclude String? include String memory = "256M" @@ -304,8 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{"--include " + include} \ ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,8 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 15b12101e04df8d842f68cb5ddef7f7f8a932a9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 14:55:43 +0200 Subject: [PATCH 104/439] fix protect command with multiple doids --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1e25938d..199d7d88 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -577,7 +577,7 @@ task Protect { -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ -reference_sample_id ~{normalName} \ - -primary_tumor_doids ~{sep=";" sampleDoids} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ -doid_json ~{doidsJson} \ From a7a504e4a3589787d8c25c5ca97149598b65f572 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 17:01:47 +0200 Subject: [PATCH 105/439] adjust resource settings --- gridss.wdl | 8 ++++---- hmftools.wdl | 4 ++-- peach.wdl | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index fcfed095..03193cca 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 64 - Int threads = 4 + Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } @@ -163,10 +163,10 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "50G" - Int threads = 4 + String memory = "25G" + Int threads = 8 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 2880 + Int timeMinutes = 1440 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 199d7d88..f8b13c66 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -799,10 +799,10 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 2 + Int threads = 4 String javaXmx = "50G" String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } diff --git a/peach.wdl b/peach.wdl index b57842f7..af44daec 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,9 +30,9 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "8G" + String memory = "2G" String dockerImage = "quay.io/biowdl/peach:v1.0" - Int timeMinutes = 20 + Int timeMinutes = 5 } command { From 9d5972de8bd3cb4e0766a78461a989f878f88999 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 30 Sep 2021 11:44:16 +0200 Subject: [PATCH 106/439] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3dbc7f6..71df5def 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.0.1 --------------------------- ++ Smoove: enable genotyping ++ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From 48f0c3ebf543b0c2e707c73fb00bdafe308a4395 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 1 Oct 2021 13:26:07 +0200 Subject: [PATCH 107/439] update changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71df5def..a6df9307 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.1 --------------------------- + Smoove: enable genotyping -+ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From c48f3bb7078e52bbb653848857028ddc9d43a6de Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Oct 2021 12:32:49 +0200 Subject: [PATCH 108/439] increase memory for sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index e78f50b6..4c2115e0 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize + # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0932a62d6a00e5c600fcda7c3fa3a7aec40638bb Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:52:01 -0600 Subject: [PATCH 109/439] Update chunked-scatter.wdl Older container is not producing the necessary bed file --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index fba1af5a..66954c36 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -86,7 +86,7 @@ task ScatterRegions { String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" From 8224e2cb52132a7978db5760afa813d640d2bb74 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:55:46 -0600 Subject: [PATCH 110/439] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6df9307..6d40cd1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.2 +--------------------------- ++ bumped ScatterRegions container to 1.0.0 + version 5.0.1 --------------------------- + Smoove: enable genotyping From 9e868dbcfbd4374ef6e04fbe389bf550be67a6ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Nov 2021 14:26:37 +0100 Subject: [PATCH 111/439] add img input for virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 03193cca..b36d6598 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -207,6 +207,7 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" @@ -246,6 +247,7 @@ task Virusbreakend { bam: {description: "A BAM file.", category: "required"} bamIndex: {description: "The index for the BAM file.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0a1995df4f853799cb945a2bc8d3ac0062039efd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Nov 2021 12:29:10 +0100 Subject: [PATCH 112/439] try version 2.11.1 for gridss --- gridss.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b36d6598..5c267e79 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 1440 } @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 180 } From 7d1f9c92406f9865e8c035a5bd19feea5a22b7ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 10 Nov 2021 12:46:04 +0100 Subject: [PATCH 113/439] upgrade gridss to 2.12.2 --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 5c267e79..1f14e23b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" } command { @@ -165,14 +165,14 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 1440 } command { gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -w . \ -t ~{threads} \ ~{gridssVcf} @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 180 } @@ -226,7 +226,7 @@ task Virusbreakend { --workingdir . \ --reference ~{referenceFasta} \ --db virusbreakenddb \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -t ~{threads} \ ~{bam} } From f9ed6158bfe70792d546e8e68b205f197c52b2ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Nov 2021 10:59:46 +0100 Subject: [PATCH 114/439] increase memory gridss --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 1f14e23b..2e68ed88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 64 + Int jvmHeapSizeGb = 85 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From c9657636bed7c7046e3799a0c3fca36473ae80e6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 10:07:54 +0100 Subject: [PATCH 115/439] increase gridss memory --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 2e68ed88..13596a48 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 85 + Int jvmHeapSizeGb = 185 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From f64bd5367fee90d51d47db7c29af13816c9fedbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 16:08:53 +0100 Subject: [PATCH 116/439] use alternative gridss image for virusbreakend --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 13596a48..b448a2dc 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 180 } From 1a9a8058f3991c0b76e934837dc64f80805fc4c6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 21:55:59 +0100 Subject: [PATCH 117/439] change gridss runtime settings --- gridss.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b448a2dc..d93f1b80 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -94,9 +94,9 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 185 - Int threads = 8 - Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + Int threads = 4 + Int timeMinutes = ceil(7200 / threads) + 180 + String dockerImage = "quay.io/biowdl/gridss:2.12.2" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 } From d3d2040093a79814a1bf0488d13a44342068c5b5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Sat, 13 Nov 2021 16:12:52 +0100 Subject: [PATCH 118/439] gridss more memory --- gridss.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index d93f1b80..8c05fe61 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,8 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 185 + Int jvmHeapSizeGb = 200 + Int nonJvmMemoryGb = 50 Int threads = 4 Int timeMinutes = ceil(7200 / threads) + 180 String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -126,7 +127,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 15}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } From e9f3c5fdf8aef7082911f6c40730264187cc6884 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 15 Nov 2021 11:55:52 +0100 Subject: [PATCH 119/439] make recovery sv vcf optional in purple --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f8b13c66..1537bce5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -648,8 +648,8 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf - File fullSvVcf - File fullSvVcfIndex + File? fullSvVcf + File? fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -676,7 +676,7 @@ task Purple { -somatic_vcf ~{somaticVcf} \ -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - -sv_recovery_vcf ~{fullSvVcf} \ + ~{"-sv_recovery_vcf " + fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ From 787ad56b36f24099ece60ae56a43af46cbbeaf00 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 26 Nov 2021 13:46:20 +0100 Subject: [PATCH 120/439] give dictionary and index to virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 8c05fe61..b43a3837 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -208,6 +208,8 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceFastaFai + File referenceFastaDict File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" From 86a249825272f9bb4384f87057593047402a1a37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 15:36:36 +0100 Subject: [PATCH 121/439] Add sampleposition in array task --- common.wdl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/common.wdl b/common.wdl index d29ed5da..fc8dc481 100644 --- a/common.wdl +++ b/common.wdl @@ -148,6 +148,43 @@ task CreateLink { } } +task GetSamplePositionInArray { + input { + Array[String] sampleIds + String sample + + # python:3.7-slim's sha256 digest. This image is based on debian buster. + String dockerImage = "python@sha256:e0f6a4df17d5707637fa3557ab266f44dddc46ebfc82b0f1dbe725103961da4e" + } + + command <<< + python <>> + + output { + Int position = read_int(stdout()) + } + + runtime { + # 4 gigs of memory to be able to build the docker image in singularity. + memory: "4G" + docker: dockerImage + } + + parameter_meta { + # inputs + sampleIds: {description: "A list of sample ids.", category: "required"} + sample: {description: "The sample for which the position is wanted.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + position: {description: ""} + } +} + task MapMd5 { input { Map[String,String] map From d970e6892b1e61d34c99e507fb3a62b7b04f2fc1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 16:33:41 +0100 Subject: [PATCH 122/439] Require 5 minutes --- common.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/common.wdl b/common.wdl index fc8dc481..1e4fc8cb 100644 --- a/common.wdl +++ b/common.wdl @@ -172,6 +172,7 @@ task GetSamplePositionInArray { # 4 gigs of memory to be able to build the docker image in singularity. memory: "4G" docker: dockerImage + timeMinutes: 5 } parameter_meta { From c21d27ff32bdf7210dddf98a711e32192e820a82 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:48:24 +0100 Subject: [PATCH 123/439] Add parameter_meta for macs2 --- macs2.wdl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 757eaf67..cbce18e9 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -26,11 +26,10 @@ task PeakCalling { Array[File]+ inputBamsIndex Array[File]+? controlBams Array[File]+? controlBamsIndex - String outDir + String outDir = "macs2" String sampleName Boolean nomodel = false - Int threads = 1 String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -50,8 +49,21 @@ task PeakCalling { } runtime { - cpu: threads + cpu: 1 memory: memory docker: dockerImage } + parameter_meta { + inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "required"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + sampleName: {description: "Name of the sample to be analysed", category: "required"} + outDir: {description: "All output files will be written in this directory.", category: "advanced"} + nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } } From 24ef56348f4ca8900f639d05aa28ec25fda3fbd1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:52:15 +0100 Subject: [PATCH 124/439] Add time minutes parameter --- macs2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index cbce18e9..983630c5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName Boolean nomodel = false - + Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -52,6 +52,7 @@ task PeakCalling { cpu: 1 memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} From 17746ebbb5668b8382050105b69f33273019a512 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:56:37 +0100 Subject: [PATCH 125/439] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d40cd1f..126f1ed9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.1.0-dev +--------------------------- ++ Update parameter_meta for macs2 ++ Add sample position in array task. + version 5.0.2 --------------------------- + bumped ScatterRegions container to 1.0.0 From 019cbb96a68c2fca141c955126b0ad9b97511f2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 16:00:30 +0100 Subject: [PATCH 126/439] More correct evaluation of controlBams input --- macs2.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 983630c5..eb71ac1d 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -24,8 +24,8 @@ task PeakCalling { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? controlBams - Array[File]+? controlBamsIndex + Array[File] controlBams + Array[File] controlBamsIndex String outDir = "macs2" String sampleName Boolean nomodel = false @@ -38,7 +38,7 @@ task PeakCalling { set -e macs2 callpeak \ --treatment ~{sep = ' ' inputBams} \ - ~{true="--control" false="" defined(controlBams)} ~{sep = ' ' controlBams} \ + ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ ~{true='--nomodel' false='' nomodel} @@ -57,8 +57,8 @@ task PeakCalling { parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - controlBams: {description: "Control BAM files for the input bam files.", category: "required"} - controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "common"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "common"} sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} From 9c5ebf6bb9d32d030b783ed03f329db735a92b6f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:24 +0100 Subject: [PATCH 127/439] add umiAwareMarkDuplicate --- picard.wdl | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/picard.wdl b/picard.wdl index f75fdc32..0e189a60 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1004,3 +1004,57 @@ task RenameSample { renamedVcf: {description: "New VCF with renamed sample."} } } + +task UmiAwareMarkDuplicatesWithMateCigar { + input { + File inputBam + String outputPathBam + String outputPathMetrics + String outputPathUmiMetrics + String tempdir + Boolean dedup = true + + String memory = "10G" + Int timeMinutes = 360 + String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" ~{tempdir} + picard UmiAwareMarkDuplicatesWithMateCigar \ + I=~{inputBam} \ + O=~{outputPathBam} \ + M=~{outputPathMetrics} \ + UMI_METRICS_FILE=~{outputPathUmiMetrics} \ + TMP_DIR=~{tempdir} \ + REMOVE_DUPLICATES=~{dedup} \ + CREATE_INDEX=true \ + } + + output { + File outputBam = outputPathBam + File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputMetrics = outputPathMetrics + File outputUmiMetrics = outputPathUmiMetrics + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} + outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} + outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} + tmpDir: {description: "Temporary directory.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} \ No newline at end of file From 010ce0ac0835f0faa1353f3f43b544c2b0ecb50c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:38 +0100 Subject: [PATCH 128/439] add annotateBamWithUmi --- fgbio.wdl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 fgbio.wdl diff --git a/fgbio.wdl b/fgbio.wdl new file mode 100644 index 00000000..d50906d3 --- /dev/null +++ b/fgbio.wdl @@ -0,0 +1,68 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task AnnotateBamWithUmis { + input { + File inputBam + File inputUmi + String outputPath + + String memory = "120G" + Int timeMinutes = 360 + String javaXmx="100G" + String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + fgbio -Xmx~{javaXmx} \ + AnnotateBamWithUmis \ + -i ~{inputBam} \ + -f ~{inputUmi} \ + -o ~{outputPath} + } + + output { + File outputBam = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file.", category: "required"} + inputUmi: {description: "The input fastq file with UMIs.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "UMI-annotated output BAM file."} + } +} From 014d43cc204fcf1f7159717c047210ca3f008c40 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 13:32:55 +0100 Subject: [PATCH 129/439] Make sure task is consistent --- picard.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/picard.wdl b/picard.wdl index 0e189a60..d8ce5ebe 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1008,11 +1008,11 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { File inputBam - String outputPathBam - String outputPathMetrics - String outputPathUmiMetrics - String tempdir - Boolean dedup = true + String outputPath + String outputPathMetrics = outputPath + ".metrics" + String outputPathUmiMetrics = outputPath + ".umi-metrics" + String tempdir = "temp" + Boolean removeDuplicates = true String memory = "10G" Int timeMinutes = 360 @@ -1024,17 +1024,17 @@ task UmiAwareMarkDuplicatesWithMateCigar { mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ I=~{inputBam} \ - O=~{outputPathBam} \ + O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ - REMOVE_DUPLICATES=~{dedup} \ + REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ } output { - File outputBam = outputPathBam - File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") File outputMetrics = outputPathMetrics File outputUmiMetrics = outputPathUmiMetrics } @@ -1048,10 +1048,11 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs inputBam: {description: "The unsorted input BAM file.", category: "required"} - outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} - tmpDir: {description: "Temporary directory.", category: "advanced"} + removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccabed5e8c56d2f742d5aba829104fe8db00d2d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 14:10:45 +0100 Subject: [PATCH 130/439] Allow multiple bam inputs --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index d8ce5ebe..d2a6ca35 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1007,7 +1007,7 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { - File inputBam + Array[File] inputBams String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" @@ -1023,7 +1023,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ - I=~{inputBam} \ + INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ @@ -1047,7 +1047,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs - inputBam: {description: "The unsorted input BAM file.", category: "required"} + inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} From 89eaf097695f6bda12a20f0d5ce993a230a8342a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:11:01 +0100 Subject: [PATCH 131/439] Add script to extract umi from read name --- umi.wdl | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 umi.wdl diff --git a/umi.wdl b/umi.wdl new file mode 100644 index 00000000..fdf764f4 --- /dev/null +++ b/umi.wdl @@ -0,0 +1,100 @@ +version 1.0 + +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task BamReadNameToUmiTag { + + # This task processes a bam file with reads that have been extracted with + # umi-tools extract. The UMI is extracted from the read name again and put + # in the bam file again with umiTag (default RX) + input { + File inputBam + String outputPath = "output.bam" + String umiTag = "RX" + + String memory = "2G" + Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" + } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< + python < Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi + + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) + + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + CODE + >>> + + output { + File outputBam = outputBam + File outputBamIndex = outputBamIndex + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "common"} + umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} + } +} From 0a66c48bb5b75722d641d23c3421d2ca50c5ad21 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:46:00 +0100 Subject: [PATCH 132/439] Add umiTagName flag --- picard.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/picard.wdl b/picard.wdl index d2a6ca35..961364e4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1013,6 +1013,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPathUmiMetrics = outputPath + ".umi-metrics" String tempdir = "temp" Boolean removeDuplicates = true + String umiTagName = "RX" String memory = "10G" Int timeMinutes = 360 @@ -1026,6 +1027,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ + UMI_TAG_NAME=~{umiTagName} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ @@ -1052,6 +1054,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d383b38d49cec511e9b6212dc1507e10ddc2fcec Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:26:23 +0100 Subject: [PATCH 133/439] Dedent overindented code --- umi.wdl | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/umi.wdl b/umi.wdl index fdf764f4..7c435654 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,38 +37,38 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Tuple[str, str]: - id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest - underscore_index = id.rfind("_") - umi = id[underscore_index + 1:] - new_id = id[:underscore_index] - if other_parts: - return " ".join([new_id, other_parts]), umi - return new_id, umi + def split_umi_from_name(name) -> Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi - def annotate_umis(in_file, out_file, bam_tag = "RX"): - in_bam = pysam.AlignmentFile(in_file, "rb") - out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) - for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) - segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] - out_bam.write(segment) + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) - if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") - pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> From acff4bd9fffbd5a6326b96144f2fe47c2b548a36 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:28:28 +0100 Subject: [PATCH 134/439] Also create directories --- umi.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 7c435654..360405ff 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,8 +37,9 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Date: Tue, 11 Jan 2022 12:55:10 +0100 Subject: [PATCH 135/439] Correct output files --- umi.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 360405ff..2a4bc9cf 100644 --- a/umi.wdl +++ b/umi.wdl @@ -75,8 +75,8 @@ task BamReadNameToUmiTag { >>> output { - File outputBam = outputBam - File outputBamIndex = outputBamIndex + File outputBam = outputPath + File outputBamIndex = bamIndexPath } runtime { From 091058e29c0aba1d8c412ec21cda942e7597d23c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 13:55:03 +0100 Subject: [PATCH 136/439] Update changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..fe0667e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Update parameter_meta for macs2 ++ Add a script to subtract UMI's from the read name and add them as + a BAM tag for each BAM record. The script is in umi.BamReadNameToUmiTag. ++ Add fgbio.AnnotateBamWithUmis. ++ Add picard.UmiAwareMarkDuplicatesWithMateCigar. ++ Update parameter_meta for macs2. + Add sample position in array task. version 5.0.2 From 054b7a7f13891c1a85c5a4e8e596e0cfb7d5282a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 16:36:37 +0100 Subject: [PATCH 137/439] Use more conventional list unpacking for clarity --- umi.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index 2a4bc9cf..e149cafe 100644 --- a/umi.wdl +++ b/umi.wdl @@ -45,11 +45,9 @@ task BamReadNameToUmiTag { def split_umi_from_name(name) -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest + id = id_and_rest[0] + # If there was no whitespace id_and_rest will have length 1 + other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" underscore_index = id.rfind("_") umi = id[underscore_index + 1:] new_id = id[:underscore_index] From 5df62f54b036d396ad78c966e19956a47df552c3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 14 Jan 2022 13:49:16 +0100 Subject: [PATCH 138/439] Add format parameter to macs2 --- macs2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macs2.wdl b/macs2.wdl index eb71ac1d..e17d613b 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -28,6 +28,7 @@ task PeakCalling { Array[File] controlBamsIndex String outDir = "macs2" String sampleName + String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours String memory = "8G" @@ -41,6 +42,7 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ + -f ~{format} \ ~{true='--nomodel' false='' nomodel} } From f05d7cb427d00a85994391b0e2829cc704bb3314 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:08:56 +0100 Subject: [PATCH 139/439] Use set_tag call from pysam --- umi.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index e149cafe..59169685 100644 --- a/umi.wdl +++ b/umi.wdl @@ -62,8 +62,7 @@ task BamReadNameToUmiTag { for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] + segment.set_tag("RX", umi, value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 28a2801941d6b56a64d1c413a4998ff220cd9899 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:25:50 +0100 Subject: [PATCH 140/439] Use proper encoding --- umi.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index 59169685..a32d646a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -59,10 +59,14 @@ task BamReadNameToUmiTag { in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. + encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - segment.set_tag("RX", umi, value_type="Z") + # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. + # Value type has to be a string though, otherwise pysam crashes. + segment.set_tag(encoded_bam_tag, umi.encode('ascii'), value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 7b2d86fef3c90983b9ca57a9aded3872756d80e3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 10:52:40 +0100 Subject: [PATCH 141/439] Set xmx value properly for UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 961364e4..46b11e51 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1015,7 +1015,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean removeDuplicates = true String umiTagName = "RX" - String memory = "10G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" } @@ -1023,7 +1024,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { command { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} - picard UmiAwareMarkDuplicatesWithMateCigar \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + UmiAwareMarkDuplicatesWithMateCigar \ INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ From 09b97388eea432a1d0b4c37fe65f5621e13e9d0b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:18:17 +0100 Subject: [PATCH 142/439] Update Picard and reevaluate use of intel inflater/defaler --- picard.wdl | 99 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/picard.wdl b/picard.wdl index 46b11e51..bf32c8ac 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -497,13 +497,15 @@ task GatherBamFiles { String outputBamPath Boolean createMd5File = false - Int? compressionLevel + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -513,7 +515,9 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ - ~{"COMPRESSION_LEVEL=" + compressionLevel} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ CREATE_INDEX=true \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } @@ -536,7 +540,9 @@ task GatherBamFiles { inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -555,10 +561,14 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater + String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -566,6 +576,10 @@ task GatherVcfs { mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherVcfs \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + CREATE_INDEX=true \ INPUT=~{sep=' INPUT=' inputVcfs} \ OUTPUT=~{outputVcfPath} } @@ -590,6 +604,10 @@ task GatherVcfs { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + # outputs outputVcf: {description: "Multiple VCF files gathered into one file."} } @@ -601,14 +619,11 @@ task MarkDuplicates { Array[File]+ inputBams String outputBamPath String metricsPath - Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). - # Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel - # deflater is updated! - Boolean useJdkDeflater = true + + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. @@ -622,7 +637,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -638,6 +653,8 @@ task MarkDuplicates { OUTPUT=~{outputBamPath} \ METRICS_FILE=~{metricsPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ @@ -668,9 +685,9 @@ task MarkDuplicates { outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} - createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} @@ -692,16 +709,20 @@ task MergeVCFs { Array[File]+ inputVCFsIndexes String outputVcfPath Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. + Boolean useJdkInflater = false # Better results for compression level 1 (much smaller). # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! - Boolean useJdkDeflater = true + # Second NOTE: No it did not change. Only the fastest algorithm with + # worse compression is wrapped in the intel GKL. Instead of using + # one of the slightly slower but better compressing alternatives from ISA-L. + # (Which are also faster than zlib.) + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +778,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" File? noneFile } @@ -818,7 +839,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -853,13 +874,15 @@ task SortSam { Boolean createMd5File = false Int maxRecordsInRam = 500000 Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -872,6 +895,8 @@ task SortSam { SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} @@ -896,7 +921,9 @@ task SortSam { sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} - compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -917,7 +944,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -967,7 +994,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1014,11 +1041,13 @@ task UmiAwareMarkDuplicatesWithMateCigar { String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" - + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" String memory = "9G" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1034,6 +1063,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -1058,6 +1090,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From a0933e34c55d4bed26510e0fd09fe013441898c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:38:42 +0100 Subject: [PATCH 143/439] Add option to assume sort order --- picard.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index bf32c8ac..144c7782 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" @@ -1065,7 +1066,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + ~{"ASSUME_SORT_ORDER=" + assumeSortOrder} } output { @@ -1089,6 +1091,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} + assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} From 58682093853cf6e62304d7797f3f268587187669 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 15:42:12 +0100 Subject: [PATCH 144/439] Have more records in ram --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 144c7782..e81cd4e3 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true @@ -1063,6 +1064,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ + MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ From 89a0324ab9467ab79528ce3908701d7b230b2822 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 Jan 2022 12:59:00 +0100 Subject: [PATCH 145/439] increase resources GRIDSS --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b43a3837..7d6a1ebf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,10 +93,10 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 200 + Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 Int threads = 4 - Int timeMinutes = ceil(7200 / threads) + 180 + Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 99215fdd9834f39569e5672b9daf5b010a777abc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 12:56:23 +0100 Subject: [PATCH 146/439] update scripts and changelog --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ bcftools.wdl | 4 ++-- scripts | 2 +- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..6c0db947 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,43 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for SnpEff. ++ Adjusted runtime settings for sambamba Markdup. ++ Added a task for sambamba Flagstat. ++ Added a task for Picard CollectWgsMetrics. ++ Added a task for Peach. ++ Added tasks for HMFtools: + + Amber + + Cobalt + + Cuppa + + CuppaChart + + GripssApplicationKt + + GripssHardFilterApplicationKt + + HealthChecker + + Linx + + Protect + + Purple + + Sage + + VirusInterpreter ++ Added a task for VirusBreakend. ++ Added a task for GridssAnnotateVcfRepeatmasker. ++ Bumped GRIDSS version to 2.12.2. ++ Adjusted GRIDSS runtime settings. ++ Added optional inputs to GRIDSS: + + blacklistBed + + gridssProperties ++ Added a task for GRIDSS AnnotateInsertedSequence. ++ Added a task for ExtractSigPredictHRD. ++ Added a task for DeconstructSigs. ++ Added option useSoftclippingForSupplementary (default false) to + BWA mem. ++ Adjusted BWA mem runtime settings. ++ Added a task for bedtools coverage. ++ Added a task for bcftools filter. ++ Adjusted runtime settings for bcftools annotate. ++ Added optional inputs to bcftools annotate: + + inputFileIndex + + annsFileIndex + Update parameter_meta for macs2 + Add sample position in array task. diff --git a/bcftools.wdl b/bcftools.wdl index 13ce36be..88d97cd0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,8 +44,8 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - + File? samplesFile + Int threads = 0 String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From bf4c1a3e8ab1bbd73a8d7a3fe29a15ac8ad69153 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 16:24:09 +0100 Subject: [PATCH 147/439] adress comments --- hmftools.wdl | 108 +++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1537bce5..0b4ba6d0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -22,9 +22,9 @@ version 1.0 task Amber { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -43,8 +43,8 @@ task Amber { command { AMBER -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -63,8 +63,8 @@ task Amber { File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" - File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" - File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] @@ -78,9 +78,9 @@ task Amber { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -102,9 +102,9 @@ task Amber { task Cobalt { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -120,8 +120,8 @@ task Cobalt { command { COBALT -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -131,9 +131,9 @@ task Cobalt { output { File version = "~{outputDir}/cobalt.version" - File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" - File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" - File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File normalGcMedianTsv = "~{outputDir}/~{referenceName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{referenceName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf" File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" @@ -150,9 +150,9 @@ task Cobalt { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -279,7 +279,7 @@ task GripssApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" String tumorName - String normalName + String referenceName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -287,8 +287,8 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "33G" - String javaXmx = "32G" + String memory = "32G" + String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } @@ -298,7 +298,7 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -383,9 +383,9 @@ task GripssHardFilterApplicationKt { task HealthChecker { input { String outputDir = "." - String normalName - File normalFlagstats - File normalMetrics + String referenceName + File referenceFlagstats + File referenceMetrics String tumorName File tumorFlagstats File tumorMetrics @@ -401,9 +401,9 @@ task HealthChecker { set -e mkdir -p ~{outputDir} health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -reference ~{normalName} \ - -ref_flagstat_file ~{normalFlagstats} \ - -ref_wgs_metrics_file ~{normalMetrics} \ + -reference ~{referenceName} \ + -ref_flagstat_file ~{referenceFlagstats} \ + -ref_wgs_metrics_file ~{referenceMetrics} \ -tumor ~{tumorName} \ -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ @@ -425,9 +425,9 @@ task HealthChecker { parameter_meta { outputDir: {description: "The path the output will be written to.", category:"required"} - normalName: {description: "The name of the normal sample.", category: "required"} - normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} - normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} @@ -546,11 +546,11 @@ task Protect { input { String refGenomeVersion String tumorName - String normalName + String referenceName Array[String]+ sampleDoids String outputDir = "." Array[File]+ serveActionability - File doidsJson + File doidJson File purplePurity File purpleQc File purpleDriverCatalogSomatic @@ -576,11 +576,11 @@ task Protect { protect -Xmx~{javaXmx} \ -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ - -reference_sample_id ~{normalName} \ + -reference_sample_id ~{referenceName} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ - -doid_json ~{doidsJson} \ + -doid_json ~{doidJson} \ -purple_purity_tsv ~{purplePurity} \ -purple_qc_file ~{purpleQc} \ -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ @@ -608,11 +608,11 @@ task Protect { parameter_meta { refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} - normalName: {description: "The name of the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} - doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} purplePurity: {description: "The purity file generated by purple.", category: "required"} purpleQc: {description: "The QC file generated by purple.", category: "required"} purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} @@ -639,7 +639,7 @@ task Protect { task Purple { input { - String normalName + String referenceName String tumorName String outputDir = "./purple" Array[File]+ amberOutput @@ -667,7 +667,7 @@ task Purple { command { PURPLE -Xmx~{javaXmx} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -713,7 +713,7 @@ task Purple { File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File purpleVersion = "~{outputDir}/purple.version" - File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" @@ -744,7 +744,7 @@ task Purple { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} outputDir: {description: "The path to the output directory.", category: "common"} amberOutput: {description: "The output files of hmftools amber.", category: "required"} @@ -787,9 +787,9 @@ task Sage { Boolean panelOnly = false String outputPath = "./sage.vcf.gz" - String? normalName - File? normalBam - File? normalBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex Int? hotspotMinTumorQual Int? panelMinTumorQual Int? hotspotMaxGermlineVaf @@ -801,8 +801,8 @@ task Sage { Int threads = 4 String javaXmx = "50G" - String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) + String memory = "51G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } @@ -810,8 +810,8 @@ task Sage { SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -ref_genome ~{referenceFasta} \ -hotspots ~{hotspots} \ -panel_bed ~{panelBed} \ @@ -848,9 +848,9 @@ task Sage { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceName: {description: "The name of the normal/reference sample.", category: "common"} + referenceBam: {description: "The BAM file for the normal sample.", category: "common"} + referenceBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From fb91a02460b22501cc1c57dc381a486a29b01fbd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 27 Jan 2022 12:01:28 +0100 Subject: [PATCH 148/439] update healthchecker --- hmftools.wdl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0b4ba6d0..76620e3c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -408,13 +408,16 @@ task HealthChecker { -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} + -output_dir ~{outputDir} + test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' + test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' } - output { - File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" - File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + Boolean succeeded = read_boolean("result") + File outputFile = if succeeded + then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + else "~{outputDir}/~{tumorName}.HealthCheckFailed" } runtime { From f234b0e8f46192d248e564f22bcd88912b890576 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 Jan 2022 14:42:42 +0100 Subject: [PATCH 149/439] add missing parameter_meta --- gridss.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 7d6a1ebf..d3d251a5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -146,7 +146,8 @@ task GRIDSS { gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"} + nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b3b79f62d4a538642318c0316080f9a098ca4b48 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 14:24:45 +0100 Subject: [PATCH 150/439] add a task for Pave --- hmftools.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..1dbfd5de 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -545,6 +545,79 @@ task Linx { } } +task Pave { + input { + String outputDir = "./" + String sampleName + File vcfFile + File vcfFileIndex + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File refGenomeVersion + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + Int timeMinutes = 50 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biowdl/pave:v1.0" + } + + command { + set -e + mkdir -p ~{outputDir} + pave -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -vcf_file ~{vcfFile} \ + -output_dir ~{outputDir} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -driver_gene_panel ~{driverGenePanel} + } + + output { + File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputVcf}.tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + vcfFile: {description: "The input VCF file.", category: "required"} + vcfFileIndex: {description: "The index for the input vcf file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + #The following should be in the same directory. + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Protect { input { String refGenomeVersion From 3ffa051fd2be4edb4fbc466836c9da782e68be27 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 17:04:00 +0100 Subject: [PATCH 151/439] add task for gripss 2.0 --- hmftools.wdl | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1dbfd5de..c0c835b5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -274,7 +274,79 @@ task CuppaChart { } } +task Gripss { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File knownFusionPairBedpe + File breakendPon + File breakpointPon + String referenceName + String tumorName + File vcf + File vcfIndex + String outputDir = "./" + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 50 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -ref_genome ~{referenceFasta} \ + -known_hotspot_file ~{knownFusionPairBedpe} \ + -pon_sgl_file ~{breakendPon} \ + -pon_sv_file ~{breakpointPon} \ + -reference ~{referenceName} \ + -sample ~{tumorName} \ + -vcf ~{vcf} \ + -output_dir ~{outputDir} \ + -output_id somatic + } + + output { + File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + vcf: {description: "The input VCF.", category: "required"} + vcfIndex: {description: "The index for the input VCF.", category: "required"} + outputDir: {description: "The path the output will be written to.", category:"required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss.vcf.gz" @@ -322,13 +394,15 @@ task GripssApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceName: {description: "The name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -339,6 +413,7 @@ task GripssApplicationKt { } task GripssHardFilterApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" @@ -724,6 +799,7 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf + File filteredSvVcfIndex File? fullSvVcf File? fullSvVcfIndex File referenceFasta From 22a880cdd2223034ebb80fcdb1006b2bd3fe81c7 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 11:52:10 +0100 Subject: [PATCH 152/439] update purple to 3.2 --- hmftools.wdl | 54 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..caafa440 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -65,8 +65,8 @@ task Amber { File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" - Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, - tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] } @@ -110,7 +110,7 @@ task Cobalt { File tumorBamIndex String outputDir = "./cobalt" File gcProfile - + Int threads = 1 String memory = "5G" String javaXmx = "4G" @@ -174,7 +174,7 @@ task Cuppa { Array[File]+ purpleOutput String sampleName Array[String]+ categories = ["DNA"] - Array[File]+ referenceData + Array[File]+ referenceData File purpleSvVcf File purpleSvVcfIndex File purpleSomaticVcf @@ -244,7 +244,7 @@ task CuppaChart { } command { - set -e + set -e mkdir -p ~{outputDir} cuppa-chart \ -sample ~{sampleName} \ @@ -429,7 +429,7 @@ task GripssHardFilterApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -490,7 +490,7 @@ task HealthChecker { output { Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } @@ -675,10 +675,9 @@ task Pave { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - #The following should be in the same directory. geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} @@ -757,7 +756,7 @@ task Protect { } parameter_meta { - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} @@ -800,41 +799,47 @@ task Purple { File germlineVcf File filteredSvVcf File filteredSvVcfIndex - File? fullSvVcf - File? fullSvVcfIndex + File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict File driverGenePanel File somaticHotspots File germlineHotspots - + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + Int threads = 1 Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' - String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" + # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" } command { PURPLE -Xmx~{javaXmx} \ -reference ~{referenceName} \ + -germline_vcf ~{germlineVcf} \ + -germline_hotspots ~{germlineHotspots} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ - -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - ~{"-sv_recovery_vcf " + fullSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ - -driver_catalog \ - -driver_gene_panel ~{driverGenePanel} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ - -germline_hotspots ~{germlineHotspots} \ + -driver_gene_panel ~{driverGenePanel} \ -threads ~{threads} } @@ -877,8 +882,8 @@ task Purple { File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, - purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, - purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, @@ -913,6 +918,11 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0d7909255421e4e7b30cfcd51e68da1530221427 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 12:41:34 +0100 Subject: [PATCH 153/439] update linx to 1.17 --- hmftools.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index caafa440..810685bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -529,8 +529,6 @@ task Linx { String outputDir = "./linx" File fragileSiteCsv File lineElementCsv - File replicationOriginsBed - File viralHostsCsv File knownFusionCsv File driverGenePanel #The following should be in the same directory. @@ -539,10 +537,10 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "5G" - String javaXmx = "4G" + String memory = "9G" + String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" } command { @@ -554,9 +552,7 @@ task Linx { -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ -line_element_file ~{lineElementCsv} \ - -replication_origins_file ~{replicationOriginsBed} \ - -viral_hosts_file ~{viralHostsCsv} \ - -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -check_fusions \ -known_fusion_file ~{knownFusionCsv} \ -check_drivers \ @@ -598,12 +594,10 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} - replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} - viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} From d6bfc449dfc6979511e746a52f6fddf0e30e7853 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 12:47:06 +0100 Subject: [PATCH 154/439] Speed up CI by using conda caching and only checking changed files Squashed commit of the following: commit 7fa743cc028b8e2c86bde49244834ee13c13e95b Author: Ruben Vorderman Date: Fri Feb 11 12:34:33 2022 +0100 Add comment about activate environment commit 2de7802e03f90cd6e26b3d8287fcb0c6b8b81d11 Author: Ruben Vorderman Date: Fri Feb 11 12:26:38 2022 +0100 Invalidate cache commit 8ca394d41361acf2511249e3e29688baf0705004 Author: Ruben Vorderman Date: Fri Feb 11 12:26:27 2022 +0100 Consolidate steps commit 31d09c6f0e86d4625bfa3a6e94a7ced910c7410c Author: Ruben Vorderman Date: Fri Feb 11 11:27:53 2022 +0100 Use correct path for caching commit 7e1374ed323bb38d674da09d7270def4a2192d00 Author: Ruben Vorderman Date: Fri Feb 11 11:22:10 2022 +0100 Do not cache conda packages commit deffd8a0776e15a4df58a1398fcbcb8b0f1430f0 Author: Ruben Vorderman Date: Fri Feb 11 11:20:59 2022 +0100 Remove unnecessary whitespace commit 8e97bcd4dfd8ee459a23f1931465875c0a41fd49 Author: Ruben Vorderman Date: Fri Feb 11 11:20:03 2022 +0100 Remove debugging task commit 8338cd4b843245d781d7028f1f1acad45c8c7d0d Author: Ruben Vorderman Date: Fri Feb 11 11:17:18 2022 +0100 Try to change path commit 6a75baa36eee340d7a6d766c89163e960a6203b0 Author: Ruben Vorderman Date: Fri Feb 11 11:12:18 2022 +0100 Delete path line in current github env commit cbbb9fe67cb796a010c01760ca2e05986f979ced Author: Ruben Vorderman Date: Fri Feb 11 11:05:50 2022 +0100 Properly activate commit 671568b7c8d79a5141429068a32b72814110b361 Author: Ruben Vorderman Date: Fri Feb 11 10:59:46 2022 +0100 Also printenv commit 4c8945e8d5305753482538389ddc8af892f493f9 Author: Ruben Vorderman Date: Fri Feb 11 10:56:45 2022 +0100 Manual activate commit a925c53a99836e81eb0e2b21075356370906c641 Author: Ruben Vorderman Date: Fri Feb 11 10:53:10 2022 +0100 Reset cache number commit 645ed2b4504d067ea1b26a0922943ef3d5c34622 Author: Ruben Vorderman Date: Fri Feb 11 10:51:09 2022 +0100 Activate environment path commit 5852d29fb538b80f06a738677e7ae271c6c57fa3 Author: Ruben Vorderman Date: Fri Feb 11 10:31:07 2022 +0100 Proper setting for cache commit 83f14a939d662d628ca47dc7b82bbc114f164541 Author: Ruben Vorderman Date: Fri Feb 11 10:03:45 2022 +0100 List environments commit 59267fbba267c0b1726733e390ff471d7012cefa Author: Ruben Vorderman Date: Fri Feb 11 10:01:58 2022 +0100 Activate environment manually commit 0a4d2cd5644407308fcc78356a8aef55de86c0c6 Author: Ruben Vorderman Date: Fri Feb 11 09:57:32 2022 +0100 List environments commit 0bc8fa939eb35a6eb352bb58b1235efecd34056f Author: Ruben Vorderman Date: Fri Feb 11 09:52:02 2022 +0100 Add mambaforge comment commit 719d92a0b5245be891d1b5c0eb38d8048abdc5a1 Author: Ruben Vorderman Date: Fri Feb 11 09:44:18 2022 +0100 Use normal conda, since environment is cached commit e5efbb75109f40cfa8b7b33280ec9707a31970d1 Author: Ruben Vorderman Date: Fri Feb 11 09:38:21 2022 +0100 Also cache environments.txt commit 4fa66afb6606ceeb7be577df9f20704d96fc3af0 Author: Ruben Vorderman Date: Fri Feb 11 09:34:46 2022 +0100 Check home commit 2ac42e42829141650585780d27f39d06ebaf8f75 Author: Ruben Vorderman Date: Wed Feb 9 17:00:27 2022 +0100 Add an annoying but effective manual check commit 78d88eae8cb3d1ca44709ce90bcffeb7c5786c1b Author: Ruben Vorderman Date: Wed Feb 9 16:54:29 2022 +0100 Cache correct path commit c05c94561785b1d5e198588dc210313014f3913d Author: Ruben Vorderman Date: Wed Feb 9 16:45:51 2022 +0100 Rename workflow commit 1c67f010c589c1c1fb407ac32e8ed74afdb3ddfd Author: Ruben Vorderman Date: Wed Feb 9 16:45:05 2022 +0100 Use correct quotes commit 7f9d2e559697e9d9d1f6df3514c8269612e7bcee Author: Ruben Vorderman Date: Wed Feb 9 16:42:25 2022 +0100 Only check changed wdl files commit 0e2a15b38e206fdb96d2d8b225999d6e5c9e6e73 Author: Ruben Vorderman Date: Wed Feb 9 16:34:35 2022 +0100 remove v parameter commit 89348dde8a84cd1d935999255c64428c99db7042 Author: Ruben Vorderman Date: Wed Feb 9 16:19:02 2022 +0100 Remove newline commit 752b8cb4a8407908348d8424fdc4b89d3219fdad Author: Ruben Vorderman Date: Wed Feb 9 16:17:33 2022 +0100 Git fetch develop commit 9216a3f846268ba00d0fe922055536b06dc975b3 Author: Ruben Vorderman Date: Wed Feb 9 15:53:13 2022 +0100 Specifically check origin commit b54c140de4fc0bf31d7c95384831aedb253f35a3 Author: Ruben Vorderman Date: Wed Feb 9 15:44:50 2022 +0100 Only chek files that are different from the base with womtool validate commit d963818753272aa18311d3d29276c3db6241e85d Author: Ruben Vorderman Date: Wed Feb 9 15:33:50 2022 +0100 Correctly use data commit 8113bfdd2e1feda6047e13da79885a3131c000e6 Author: Ruben Vorderman Date: Wed Feb 9 15:32:48 2022 +0100 Set correct env cache param commit 4f7af2ed0365887be9147954290c4b807673afdd Author: Ruben Vorderman Date: Wed Feb 9 15:30:23 2022 +0100 Add lint-evnironment commit b026b5a8a77ea131b229a50cb28e0d301915cfb8 Author: Ruben Vorderman Date: Wed Feb 9 15:28:50 2022 +0100 Use mamba env update commit 41fda1a9f52d56578a76f8bf185db86da2128a0e Author: Ruben Vorderman Date: Wed Feb 9 15:25:21 2022 +0100 Use cache commit fd1a64261bea956b6b31a26f5eaa38ce4a63121c Author: Ruben Vorderman Date: Wed Feb 9 14:31:17 2022 +0100 Add missing done statement commit 4a64eb43535f48e0558ba6c5dc408178784ef207 Merge: a36a227 f234b0e Author: Ruben Vorderman Date: Wed Feb 9 14:23:48 2022 +0100 Merge branch 'develop' into BIOWDL-583 commit a36a2274116732bc8e3229a267fe35ee4d61e7da Author: Ruben Vorderman Date: Wed Feb 9 14:23:26 2022 +0100 Implement all checks in lint.yml directly commit 391bb0de9619e75293599a1be1d24322fd466f4c Author: Ruben Vorderman Date: Wed Feb 9 14:11:33 2022 +0100 Use a separate lint file commit 832a131cee403ec0ac7d983d6e82fd567ce1b246 Author: Ruben Vorderman Date: Tue Dec 14 16:32:30 2021 +0100 Use mamba-forge and mamba to install dependencies --- .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/lint-environment.yml | 9 ++++ .github/workflows/ci.yml | 30 ----------- .github/workflows/lint.yml | 93 ++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 .github/lint-environment.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/lint.yml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3b4ec9ac..372071ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,3 +2,4 @@ - [ ] Pull request details were added to CHANGELOG.md. - [ ] Documentation was updated (if required). - [ ] `parameter_meta` was added/updated (if required). +- [ ] Submodule branches are on develop or a tagged commit. diff --git a/.github/lint-environment.yml b/.github/lint-environment.yml new file mode 100644 index 00000000..63b538fc --- /dev/null +++ b/.github/lint-environment.yml @@ -0,0 +1,9 @@ +name: biowdl-lint +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - cromwell + - wdl-aid + - miniwdl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 78566111..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Continuous integration - -on: - pull_request: - paths_ignore: - - "docs/**" - -defaults: - run: - # This is needed for miniconda, see: - # https://github.com/marketplace/actions/setup-miniconda#important - shell: bash -l {0} - -jobs: - lint: - runs-on: ubuntu-latest - name: Womtool validate and submodule up to date. - steps: - - uses: actions/checkout@v2.3.4 - with: - submodules: recursive - - name: install miniconda - uses: conda-incubator/setup-miniconda@v2.0.1 - with: - channels: conda-forge,bioconda,defaults - # Conda-incubator uses 'test' environment by default. - - name: install requirements - run: conda install -n test cromwell miniwdl wdl-aid - - name: run linting - run: bash scripts/biowdl_lint.sh diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..e6edbbab --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,93 @@ +name: Linting + +on: + pull_request: + paths_ignore: + - "docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Linting checks + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV + + - name: Cache conda environment + uses: actions/cache@v2.1.7 + env: + # Increase this value to manually invalidate the cache + CACHE_NUMBER: 0 + with: + path: /usr/share/miniconda/envs/biowdl-lint + key: + ${{runner.os}}-biowdl-lint-${{ env.CACHE_NUMBER }}-${{env.DATE}}-${{ hashFiles('.github/lint-environment.yml') }} + id: env_cache + + # Use the builtin conda. This is the fastest installation. It may not be + # the fastest for resolving, but the package cache mitigates that problem. + # Since this installs fastest, it is fastest for all runs where a cache + # hit occurs. + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.1.1 + with: + channels: conda-forge,bioconda,defaults + channel-priority: strict + auto-activate-base: false + use-only-tar-bz2: true # Needed for proper caching according to the documentation. + # activate-environment is broken! This always seems to create a new environment. + # Activation is therefore done separately. + + - name: Create test environment if no cache is present + run: conda env create -n biowdl-lint -f .github/lint-environment.yml + if: steps.env_cache.outputs.cache-hit != 'true' + + - name: Activate test environment + # The new PATH should be passed to the environment, otherwise it won't register. + run: | + conda activate biowdl-lint + echo "PATH=$PATH" >> $GITHUB_ENV + + - name: Fetch develop branch for comparisons + run: git fetch --depth=1 origin develop + + - name: run womtool validate + # Only check files that have changed from the base reference. + # Womtool validate checks very slowly, so this saves a lot of time. + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done + " + - name: run miniwdl check + run: bash -c 'miniwdl check $(git ls-files *.wdl)' + + - name: Check copyright headers + run: | + bash -c ' + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done + ' + - name: Check parameter_meta for inputs + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done + " From 54337a3c99596e48149d0d2522cc79c0a7b379e9 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 13:38:34 +0100 Subject: [PATCH 155/439] update peach to 1.5 --- peach.wdl | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/peach.wdl b/peach.wdl index af44daec..6a5770f4 100644 --- a/peach.wdl +++ b/peach.wdl @@ -22,7 +22,6 @@ version 1.0 task Peach { input { - File transcriptTsv File germlineVcf File germlineVcfIndex String tumorName @@ -31,28 +30,26 @@ task Peach { File panelJson String memory = "2G" - String dockerImage = "quay.io/biowdl/peach:v1.0" + String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } command { + set -e + mkdir -p ~{outputDir} peach \ - --recreate_bed \ - --transcript_tsv ~{transcriptTsv} \ - ~{germlineVcf} \ - ~{tumorName} \ - ~{normalName} \ - 1.0 \ - ~{outputDir} \ - ~{panelJson} \ - vcftools + -vcf ~{germlineVcf} \ + --sample_t_id ~{tumorName} \ + --sample_r_id ~{normalName} \ + --tool_version 1.5 \ + --outputDir ~{outputDir} \ + --panel } output { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" - File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] + Array[File] outputs = [callsTsv, genotypeTsv] } runtime { @@ -62,7 +59,6 @@ task Peach { } parameter_meta { - transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} @@ -74,4 +70,4 @@ task Peach { memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From adb8a68ce8fff78613ee95451db821363b74353b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:42:36 +0100 Subject: [PATCH 156/439] Debug task --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e6edbbab..622e0581 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,6 +66,7 @@ jobs: # Womtool validate checks very slowly, so this saves a lot of time. run: | bash -c " + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done From 37faa1b46883bb93c6e926141d6145b3ead9fafd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:50:07 +0100 Subject: [PATCH 157/439] Use heredoc script --- .github/workflows/lint.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 622e0581..7eb6fe5d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -65,25 +65,25 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash -c " + bash <<- SCRIPT set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done - " + SCRIPT - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash -c ' + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - ' + SCRIPT - name: Check parameter_meta for inputs run: | - bash -c " + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr @@ -91,4 +91,4 @@ jobs: exit 1 fi done - " + SCRIPT From 7d8cadf598e9359e6ea6d9822fe63210f026acfe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:06:44 +0100 Subject: [PATCH 158/439] Use always upload cache --- .github/workflows/lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7eb6fe5d..11bf7a40 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,7 +24,8 @@ jobs: run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - name: Cache conda environment - uses: actions/cache@v2.1.7 + # Use an always upload cache to prevent solving conda environment again and again on failing linting. + uses: pat-s/always-upload-cache@v2.1.5 env: # Increase this value to manually invalidate the cache CACHE_NUMBER: 0 From eba0865e6865217ed34de9e04ac0f4c1b86f9435 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:11:34 +0100 Subject: [PATCH 159/439] Run stuff directly in bash --- .github/workflows/lint.yml | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 11bf7a40..61e3d99f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,30 +66,24 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash <<- SCRIPT - set -x - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - womtool validate $WDL_FILE - done - SCRIPT + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done - name: Check parameter_meta for inputs run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || - if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr - then - exit 1 - fi - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done From e72270755a25b5259f99d6e1855bf10926a2dc5d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:14:27 +0100 Subject: [PATCH 160/439] Use set -x to better see what happens --- .github/workflows/lint.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 61e3d99f..7ef19e58 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -71,15 +71,19 @@ jobs: womtool validate $WDL_FILE done - name: run miniwdl check - run: bash -c 'miniwdl check $(git ls-files *.wdl)' + run: | + set -x + bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - name: Check parameter_meta for inputs run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr From 868f3617f22d28ae6855ed8c5d75fd76c967a5db Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 14 Feb 2022 10:51:20 +0100 Subject: [PATCH 161/439] Add format parameter to parameter_meta --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index e17d613b..2afe3bbe 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -67,6 +67,6 @@ task PeakCalling { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + format: {description: "Which format to use. Use BAMPE for paired-end reads.", category: "common"} } } From 0f6d75c76ed78cc1847acc732fd78ca44b2646a6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Feb 2022 17:04:49 +0100 Subject: [PATCH 162/439] fix some issues --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..27badc9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -312,8 +312,8 @@ task Gripss { output { File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi" } runtime { @@ -629,7 +629,7 @@ task Pave { File referenceFasta File referenceFastaFai File referenceFastaDict - File refGenomeVersion + String refGenomeVersion File driverGenePanel #The following should be in the same directory. File geneDataCsv From b72d2fcff910a8a7cf3c1103f90bcf2974b75b4c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:16:17 +0100 Subject: [PATCH 163/439] fix Pave output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index b349038d..36909ee4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -652,7 +652,7 @@ task Pave { output { File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputVcf}.tbi" + File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 0554cfe785f39b9e1ebfef4a2dda7450a4ed749b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:36:33 +0100 Subject: [PATCH 164/439] fix copy paste error --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 36909ee4..c9745b57 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -651,8 +651,8 @@ task Pave { } output { - File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" + File outputVcf = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 92d964d52ea3d64f7f927f6b41933098c4ec3678 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 12:33:02 +0100 Subject: [PATCH 165/439] fix purple outputs --- hmftools.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c9745b57..2015c125 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -838,11 +838,13 @@ task Purple { } output { - File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" - File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purpleGermlineDeletionTsv = "~{outputDir}/~{tumorName}.purple.germline.deletion.tsv" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" @@ -851,10 +853,9 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" - File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" - File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File purpleVersion = "~{outputDir}/purple.version" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" @@ -863,19 +864,19 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File purpleVersion = "~{outputDir}/purple.version" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" - File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" - File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" - File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" - File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" - File circosGaps = "~{outputDir}/circos/gaps.txt" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" - Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + File circosGaps = "~{outputDir}/circos/gaps.txt" + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From d554e60c08dee3597680cb18d9eee67201aba5ac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:16:33 +0100 Subject: [PATCH 166/439] fix peach command --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 6a5770f4..bd8375d7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -43,7 +43,7 @@ task Peach { --sample_r_id ~{normalName} \ --tool_version 1.5 \ --outputDir ~{outputDir} \ - --panel + --panel ~{panelJson} } output { From 54f323f52f7ac0d0fbbab1f893b5f8583d504791 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:20:54 +0100 Subject: [PATCH 167/439] fix some typos --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index bd8375d7..d1bc17f8 100644 --- a/peach.wdl +++ b/peach.wdl @@ -38,11 +38,11 @@ task Peach { set -e mkdir -p ~{outputDir} peach \ - -vcf ~{germlineVcf} \ + --vcf ~{germlineVcf} \ --sample_t_id ~{tumorName} \ --sample_r_id ~{normalName} \ --tool_version 1.5 \ - --outputDir ~{outputDir} \ + --outputdir ~{outputDir} \ --panel ~{panelJson} } From c675c91fbc91f932c6f5018986d025993611f8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 15:14:35 +0100 Subject: [PATCH 168/439] fix linx output and health-checker command --- hmftools.wdl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2015c125..3ab203fb 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -484,8 +484,14 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { @@ -531,6 +537,7 @@ task Linx { File lineElementCsv File knownFusionCsv File driverGenePanel + Boolean writeAllVisFusions = false #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -540,7 +547,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } command { @@ -558,7 +565,8 @@ task Linx { -check_drivers \ -driver_gene_panel ~{driverGenePanel} \ -chaining_sv_limit 0 \ - -write_vis_data + -write_vis_data \ + ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} } output { @@ -569,7 +577,6 @@ task Linx { File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" - File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" @@ -578,9 +585,9 @@ task Linx { File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, - linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, - linxVisFusion, linxVisGeneExon, linxVisProteinDomain, - linxVisSegments, linxVisSvData, linxVersion] + linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, + linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, + linxVersion] } runtime { @@ -600,6 +607,7 @@ task Linx { lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 5e821d51571d91727357e324cc9283eafce5e427 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 16:26:29 +0100 Subject: [PATCH 169/439] fix health checker output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3ab203fb..9a3bd437 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -495,7 +495,7 @@ task HealthChecker { } output { - Boolean succeeded = read_boolean("result") + Boolean succeeded = read_boolean("succeeded") File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" From f2cc5cc02fb5ed2376969ff745ce0d6741fc32ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 11:43:22 +0100 Subject: [PATCH 170/439] add LinxVisualisations --- hmftools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9a3bd437..c852b520 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -622,6 +622,61 @@ task Linx { } } +task LinxVisualisations { + input { + String outputDir = "./linx_visualisation" + String sample + String refGenomeVersion + Array[File]+ linxOutput + Boolean plotReportable = true + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \ + com.hartwig.hmftools.linx.visualiser.SvVisualiser \ + -sample ~{sample} \ + -ref_genome_version ~{refGenomeVersion} \ + -circos /usr/local/bin/circos \ + -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ + -data_out ~{outputDir}/circos \ + -plot_out ~{outputDir}/plot \ + ~{if plotReportable then "-plot_reportable" else ""} + } + + output { + + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sample: {description: "The sample's name.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + linxOutput: {description: "The directory containing the linx output.", category: "required"} + plotReportable: {description: "Equivalent to the -plot_reportable flag.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" From 8fcd2e2598fbc340abdda2b3a3d56dae04cb6bdf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:41:51 +0100 Subject: [PATCH 171/439] add linx visualisation output --- hmftools.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c852b520..c1a824c6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 10 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -647,12 +647,13 @@ task LinxVisualisations { -circos /usr/local/bin/circos \ -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ -data_out ~{outputDir}/circos \ - -plot_out ~{outputDir}/plot \ + -plot_out ~{outputDir}/plots \ ~{if plotReportable then "-plot_reportable" else ""} } output { - + Array[File] circos = glob("~{outputDir}/circos/*") + Array[File] plots = glob("~{outputDir}/plots/*" } runtime { From 97c9681b4d10a9fc5d7c2b930df9e69cba85d07c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:50:28 +0100 Subject: [PATCH 172/439] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index c1a824c6..7f739311 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -653,7 +653,7 @@ task LinxVisualisations { output { Array[File] circos = glob("~{outputDir}/circos/*") - Array[File] plots = glob("~{outputDir}/plots/*" + Array[File] plots = glob("~{outputDir}/plots/*") } runtime { From 2467174555e85c5b4cf819018afd44a8b5f24af8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 14:19:08 +0100 Subject: [PATCH 173/439] update virus-interpreter to 1.2 --- hmftools.wdl | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7f739311..65187f44 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1099,25 +1099,29 @@ task Sage { task VirusInterpreter { input { String sampleId + File purplePurityTsv + File prupleQcFile + File tumorSampleWgsMetricsFile File virusBreakendTsv File taxonomyDbTsv - File virusInterpretationTsv - File virusBlacklistTsv + File virusReportingDbTsv String outputDir = "." String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" } command { - virus-interpreter -Xmx~{javaXmx} \ + virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample_id ~{sampleId} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{prupleQcFile} \ + -tumor_sample_wgs_metrics_file ~{tumorSampleWgsMetricsFile} \ -virus_breakend_tsv ~{virusBreakendTsv} \ -taxonomy_db_tsv ~{taxonomyDbTsv} \ - -virus_interpretation_tsv ~{virusInterpretationTsv} \ - -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -virus_reporting_db_tsv ~{virusReportingDbTsv} \ -output_dir ~{outputDir} } @@ -1133,10 +1137,12 @@ task VirusInterpreter { parameter_meta { sampleId: {description: "The name of the sample.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + prupleQcFile: {description: "The QC file produced by purple.", category: "required"} + tumorSampleWgsMetricsFile: {description: "The picard WGS metrics file for this sample.", category: "required"} virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} - virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} - virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + virusReportingDbTsv: {description: "A virus reporting tsv.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From b76866a2fbe5c23961f63dfa6b68697cf3c23126 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Mar 2022 15:37:19 +0100 Subject: [PATCH 174/439] update protect to 2.0 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 65187f44..da9c6fd5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1440 #FIXME String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -777,7 +777,7 @@ task Protect { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biowdl/protect:v1.4" + String dockerImage = "quay.io/biowdl/protect:v2.0" } command { From 513e64560afa2a532a791289e5ef77a90006aa50 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 10:23:07 +0100 Subject: [PATCH 175/439] fix health-checker --- hmftools.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..27b31bca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -409,13 +409,19 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { - Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + Boolean succeeded = read_boolean("succeeded") + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } From 652735023d7a71738b0ccea450e4fedd27e41830 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 12:42:00 +0100 Subject: [PATCH 176/439] update cuppa to 1.6 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index da9c6fd5..277c8dd4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -184,7 +184,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { @@ -240,7 +240,7 @@ task CuppaChart { String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { From d5294222e69c6e793ea0d13e448e67b9482e5a10 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 3 Mar 2022 15:50:52 +0100 Subject: [PATCH 177/439] add orange, cupGenerateReport and (hopefully) fix sage plots --- hmftools.wdl | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 277c8dd4..75fd2d19 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,62 @@ task Cobalt { } } +task CupGenerateReport { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "5G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.6" + } + + # This script writes to the directory that the input is located in. + # Giving the input directly will cause the script to write in the + # locallized input dir, which may cause issues with write permissions + # in certain execution engines or backends. We, therefore, make links + # to a working directory, and give that directory as input instead. + # We can't just use the outputDir directly. This could be an + # absolute path in which case the linking might fail due to name + # collisions. Outputs are copied to the given output dir afterwards. + command { + set -e + mkdir -p ./workdir ~{outputDir} + ln -s -t workdir ~{sep=" " cupData} + CupGenerateReport \ + ~{sampleName} \ + workdir + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.summry.png \ + ./workdir/~{sampleName}.cup.report.features.png \ + ./workdir/~{sampleName}_cup.report.pdf + } + + output { + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" + File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The sample id.", category: "required"} + cupData: {description: "The output produced by cuppa.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Cuppa { input { Array[File]+ linxOutput @@ -632,7 +688,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -678,6 +734,151 @@ task LinxVisualisations { } } +task Orange { + input { + String outputDir = "./orange" + File doidJson + Array[String] sampleDoids + String tumorName + String referenceName + File referenceMetrics + File tumorMetrics + File referenceFlagstats + File tumorFlagstats + File sageGermlineGeneCoverageTsv + File sageSomaticRefSampleBqrPlot + File sageSomaticTumorSampleBqrPlot + File purpleGeneCopyNumberTsv + File purpleGermlineDriverCatalogTsv + File purpleGermlineVariantVcf + File purpleGermlineVariantVcfIndex + Array[File]+ purplePlots + File purplePurityTsv + File purpleQcFile + File purpleSomaticDriverCatalogTsv + File purpleSomaticVariantVcf + File purpleSomaticVariantVcfIndex + File linxFusionTsv + File linxBreakendTsv + File linxDriverCatalogTsv + File linxDriverTsv + Array[File]+ linxPlots + File cuppaResultCsv + File cuppaSummaryPlot + File cuppaFeaturePlot + File chordPredictionTxt + File peachGenotypeTsv + File protectEvidenceTsv + File annotatedVirusTsv + #File pipelineVersionFile + File cohortMappingTsv + File cohortPercentilesTsv + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 1440 #FIXME + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -output_dir ~{outputDir} \ + -doid_json ~{doidJson} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ + -max_evidence_level C \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{referenceName} \ + -ref_sample_wgs_metrics_file ~{referenceMetrics} \ + -tumor_sample_wgs_metrics_file ~{tumorMetrics} \ + -ref_sample_flagstat_file ~{referenceFlagstats} \ + -tumor_sample_flagstat_file ~{tumorFlagstats} \ + -sage_germline_gene_coverage_tsv ~{sageGermlineGeneCoverageTsv} \ + -sage_somatic_ref_sample_bqr_plot ~{sageSomaticRefSampleBqrPlot} \ + -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ + -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \ + -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{purpleQcFile} \ + -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \ + -linx_fusion_tsv ~{linxFusionTsv} \ + -linx_breakend_tsv ~{linxBreakendTsv} \ + -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ + -linx_driver_tsv ~{linxDriverTsv} \ + -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ + -cuppa_result_csv ~{cuppaResultCsv} \ + -cuppa_summary_plot ~{cuppaSummaryPlot} \ + -cuppa_feature_plot ~{cuppaFeaturePlot} \ + -chord_prediction_txt ~{chordPredictionTxt} \ + -peach_genotype_tsv ~{peachGenotypeTsv} \ + -protect_evidence_tsv ~{protectEvidenceTsv} \ + -annotated_virus_tsv ~{annotatedVirusTsv} \ + -cohort_mapping_tsv ~{cohortMappingTsv} \ + -cohort_percentiles_tsv ~{cohortPercentilesTsv} + } + #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} + + output { + File orangeJson = "~{outputDir}/~{tumorName}.orange.json" + File orangePdf = "~{outputDir}/~{tumorName}.orange.pdf" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "common"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + sageGermlineGeneCoverageTsv: {description: "Gene coverage file produced by the germline sage run.", category: "required"} + sageSomaticRefSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + sageSomaticTumorSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + purpleGeneCopyNumberTsv: {description: "Copy number tsv produced by purple.", category: "required"} + purpleGermlineDriverCatalogTsv: {description: "Germline driver catalog produced by purple.", category: "required"} + purpleGermlineVariantVcf: {description: "Germline variant vcf produced by purple.", category: "required"} + purplePlots: {description: "The plots generated by purple.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + purpleQcFile: {description: "The qc file produced by purple.", category: "required"} + purpleSomaticDriverCatalogTsv: {description: "Somatic driver catalog produced by purple.", category: "required"} + purpleSomaticVariantVcf: {description: "Somatic variant vcf produced by purple.", category: "required"} + linxFusionTsv: {description: "The fusions tsv produced by linx.", category: "required"} + linxBreakendTsv: {description: "The breakend tsv produced by linx.", category: "required"} + linxDriverCatalogTsv: {description: "The driver catalog produced by linx.", category: "required"} + linxDriverTsv: {description: "The driver tsv produced by linx.", category: "required"} + linxPlots: {description: "The plots generated by linx.", category: "required"} + cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} + cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + chordPredictionTxt: {description: "Chord prediction results.", category: "required"} + peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} + protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} + annotatedVirusTsv: {description: "Annotated virus tsv produced by virus-interpreter.", category: "required"} + #pipelineVersionFile: {description: "", category: "required"} + cohortMappingTsv: {description: "Cohort mapping file from the HMFTools resources.", category: "required"} + cohortPercentilesTsv: {description: "Cohort percentile file from the HMFTools resources.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" @@ -1024,7 +1225,7 @@ task Sage { String javaXmx = "50G" String memory = "51G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } command { @@ -1054,8 +1255,11 @@ task Sage { output { File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... + File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png" + File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv" + File tumorSageBqrPng = "~{tumorName}.sage.bqr.png" + File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv" } runtime { From 960aa3cf0a713b6d7870b33c529e22b98b711aea Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Mar 2022 15:06:49 +0100 Subject: [PATCH 178/439] Slightly less records in RAM --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index e81cd4e3..436369d7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,7 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" - Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. + Int maxRecordsInRam = 1500000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true From cf0b105cdf0a2ad7a2c1354857c281c18150a36b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 10:32:35 +0100 Subject: [PATCH 179/439] Add missing whitespace. Co-authored-by: Davy Cats --- umi.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/umi.wdl b/umi.wdl index a32d646a..0dc5c55e 100644 --- a/umi.wdl +++ b/umi.wdl @@ -34,7 +34,9 @@ task BamReadNameToUmiTag { Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< python < Date: Mon, 7 Mar 2022 12:15:51 +0100 Subject: [PATCH 180/439] Add parameter_meta for useSoftclippingforSupplementary --- bwa.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bwa.wdl b/bwa.wdl index 1cb170b7..373de628 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -94,6 +94,7 @@ task Mem { outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} usePostalt: {description: "Whether to use the postalt script from bwa kit."} + useSoftclippingForSupplementary: {description: "Use soft-clipping for supplementary alignments instead of hard-clipping", category: "common"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} From b070d3efbfcbd41ca3545a2eec0e5bd1a6dc2a3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 12:19:13 +0100 Subject: [PATCH 181/439] Add parameter_meta for Picard UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b6d9fadf..eea8d42f 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1160,8 +1160,10 @@ task UmiAwareMarkDuplicatesWithMateCigar { assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccfb0e0d3b3e31ad5aa08fc527ecaa46e77c589 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 7 Mar 2022 13:46:17 +0100 Subject: [PATCH 182/439] fix CupGenerateReport --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 75fd2d19..d9dea387 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -193,7 +193,7 @@ task CupGenerateReport { ln -s -t workdir ~{sep=" " cupData} CupGenerateReport \ ~{sampleName} \ - workdir + workdir/ mv -t ~{outputDir} \ ./workdir/~{sampleName}.cup.report.summry.png \ ./workdir/~{sampleName}.cup.report.features.png \ From 799811db76b369b057aa54555e08c3025c6905a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:38:40 +0100 Subject: [PATCH 183/439] fix cupGenerateReport --- hmftools.wdl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index d9dea387..2e294ecd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -195,15 +195,19 @@ task CupGenerateReport { ~{sampleName} \ workdir/ mv -t ~{outputDir} \ - ./workdir/~{sampleName}.cup.report.summry.png \ - ./workdir/~{sampleName}.cup.report.features.png \ - ./workdir/~{sampleName}_cup.report.pdf + ./workdir/~{sampleName}.cup.report.summary.png \ + ./workdir/~{sampleName}_cup_report.pdf + if [ -f ./workdir/~{sampleName}.cup.report.features.png ] + then + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.features.png + fi } output { - File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" - File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" - File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summary.png" + File? featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup_report.pdf" } runtime { @@ -765,7 +769,7 @@ task Orange { Array[File]+ linxPlots File cuppaResultCsv File cuppaSummaryPlot - File cuppaFeaturePlot + File? cuppaFeaturePlot File chordPredictionTxt File peachGenotypeTsv File protectEvidenceTsv @@ -812,7 +816,7 @@ task Orange { -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ -cuppa_result_csv ~{cuppaResultCsv} \ -cuppa_summary_plot ~{cuppaSummaryPlot} \ - -cuppa_feature_plot ~{cuppaFeaturePlot} \ + ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \ -chord_prediction_txt ~{chordPredictionTxt} \ -peach_genotype_tsv ~{peachGenotypeTsv} \ -protect_evidence_tsv ~{protectEvidenceTsv} \ @@ -861,7 +865,7 @@ task Orange { linxPlots: {description: "The plots generated by linx.", category: "required"} cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} - cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "common"} chordPredictionTxt: {description: "Chord prediction results.", category: "required"} peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} From 5ae1f6de5c3c4efe38a792e3be1104bbacacea3b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:47:30 +0100 Subject: [PATCH 184/439] fix copy-paste error (orange docker image) --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2e294ecd..34941059 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" } command { From 54d70a6b508f4a8360ce995a4bda5f6094225826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:55:02 +0100 Subject: [PATCH 185/439] fix copy-paste error --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 34941059..5a480f93 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" + String dockerImage = "quay.io/biowdl/orange:v1.6" } command { From 9ca13a0a999ff874d041d26c4860c8c07edbe92d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Mar 2022 10:16:59 +0100 Subject: [PATCH 186/439] Remove duplicate options for markduplicates --- picard.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index eea8d42f..3d835829 100644 --- a/picard.wdl +++ b/picard.wdl @@ -726,8 +726,6 @@ task MarkDuplicates { CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ - USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { From ac55982a7acf3c06460ae0b8ac2c394865eeaa4c Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:46:53 +0100 Subject: [PATCH 187/439] run tabix if vcf index is missing in gridss --- gridss.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..92d7df1e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -116,6 +116,12 @@ task GRIDSS { ~{normalBam} \ ~{tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + + # For some reason the VCF index is sometimes missing + if [ ! -e ~{outputPrefix}.vcf.gz.tbi ] + then + tabix ~{outputPrefix}.vcf.gz + fi } output { From 173bb2e6547c1fa4ee20ec5da98368522e18b887 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:49:26 +0100 Subject: [PATCH 188/439] update changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c0db947..b028b60a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS task will now run tabix separately if GRIDSS doesn't + produce a vcf index. + Added a task for SnpEff. + Adjusted runtime settings for sambamba Markdup. + Added a task for sambamba Flagstat. @@ -28,7 +30,7 @@ version 5.1.0-dev + Sage + VirusInterpreter + Added a task for VirusBreakend. -+ Added a task for GridssAnnotateVcfRepeatmasker. ++ Added a task for GridssAnnotateVcfRepeatmasker. + Bumped GRIDSS version to 2.12.2. + Adjusted GRIDSS runtime settings. + Added optional inputs to GRIDSS: @@ -147,7 +149,7 @@ version 4.0.0 + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. - Using more threads reduces the chance of the samtools sort pipe getting + Using more threads reduces the chance of the samtools sort pipe getting blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. From 1c02ce1ea5464c11491f9dc67802ab71cb46dbcb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 12:44:01 +0200 Subject: [PATCH 189/439] add task for sv type annotation of gridss results --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 92d7df1e..f771ebe4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,6 +79,69 @@ task AnnotateInsertedSequence { } } +task AnnotateSvType { + input { + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.svtyped.vcf" + + String memory = "32G" + String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" + Int timeMinutes = 240 + } + + # Based on https://github.com/PapenfussLab/gridss/issues/74 + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + R --vanilla << EOF + library(VariantAnnotation) + library(StructuralVariantAnnotation) + + vcf_path <- "~{gridssVcf}" + out_path <- "~{outputPath}" + + # Simple SV type classifier + simpleEventType <- function(gr) { + return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal + ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", + ifelse(strand(gr) == strand(partner(gr)), "INV", + ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL", + "DUP"))))) + } + + header <- scanVcfHeader(vcf_path) + vcf <- readVcf(vcf_path, seqinfo(header)) + gr <- breakpointRanges(vcf) + svtype <- simpleEventType(gr) + info(vcf[gr$sourceId])$SVTYPE <- svtype + writeVcf(vcf, out_path) + EOF + >>> + + output { + File vcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"} + gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam From 87bb3c4f2104cb3c8a020aa0abfb7f5a4faa387a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 13:32:48 +0200 Subject: [PATCH 190/439] copy paste error --- gridss.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index f771ebe4..b38f344e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -134,8 +134,6 @@ task AnnotateSvType { gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 1b4238c66c6150e57e128086d16d6939a1198406 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 14:14:33 +0200 Subject: [PATCH 191/439] typo --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index b38f344e..00705392 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,7 +79,7 @@ task AnnotateInsertedSequence { } } -task AnnotateSvType { +task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex From bd153caa313e5fad73d2716813f7eb02c36b963c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jun 2022 14:56:45 +0200 Subject: [PATCH 192/439] adjust gridss threads --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..b118af9d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -95,7 +95,7 @@ task GRIDSS { Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 - Int threads = 4 + Int threads = 16 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 873ece6f64e85bea10c28754f3260de155cc8d80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 17 Jun 2022 14:59:35 +0200 Subject: [PATCH 193/439] adjust some runtime settings --- bedtools.wdl | 2 +- gridss.wdl | 10 +++++----- hmftools.wdl | 8 ++++---- sambamba.wdl | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 1d956cab..80a281d6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -76,7 +76,7 @@ task Coverage { String outputPath = "./coverage.tsv" String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } diff --git a/gridss.wdl b/gridss.wdl index b118af9d..c1a41a25 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,9 +93,9 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 300 - Int nonJvmMemoryGb = 50 - Int threads = 16 + Int jvmHeapSizeGb = 64 + Int nonJvmMemoryGb = 10 + Int threads = 12 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } @@ -216,9 +216,9 @@ task Virusbreakend { String outputPath = "./virusbreakend.vcf" String memory = "75G" - Int threads = 8 + Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" - Int timeMinutes = 180 + Int timeMinutes = 320 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..ef6355c4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -780,7 +780,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" } @@ -1225,9 +1225,9 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 4 - String javaXmx = "50G" - String memory = "51G" + Int threads = 32 + String javaXmx = "120G" + String memory = "121G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } diff --git a/sambamba.wdl b/sambamba.wdl index 4c2115e0..6696668a 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -28,7 +28,7 @@ task Flagstat { Int threads = 2 String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From af5cf337f77dff48e4526e1da9ca6688a1fbe56c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 22 Jun 2022 12:48:03 +0200 Subject: [PATCH 194/439] adjust sage memory and time --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index ef6355c4..6c6ef045 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,9 +1226,9 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "120G" - String memory = "121G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) + String javaXmx = "8G" + String memory = "9G" + Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 4608518f1afa3159658731aaac2dbfc32bedd8b8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jun 2022 11:09:25 +0200 Subject: [PATCH 195/439] increase sage memory --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6c6ef045..32bc24fd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,8 +1226,8 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "8G" - String memory = "9G" + String javaXmx = "16G" + String memory = "20G" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 01aa41d21addca2002f1269ba41e165c33e9e03e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:09:40 +0200 Subject: [PATCH 196/439] fix heredoc --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 00705392..0e8fd434 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task AnnotateSvTypes { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - R --vanilla << EOF + R --vanilla << "EOF" library(VariantAnnotation) library(StructuralVariantAnnotation) @@ -115,7 +115,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path) + writeVcf(vcf, out_path, index=T) EOF >>> From 39af0ad74c6296b2f9aa536ecb2ba123a156670e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:12:14 +0200 Subject: [PATCH 197/439] fix output name --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 0e8fd434..d0428e59 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,7 +83,7 @@ task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex - String outputPath = "./gridss.svtyped.vcf" + String outputPath = "./gridss.svtyped.vcf.bgz" String memory = "32G" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" From 4e2a09e11c36a69b84451c44bf70c50825d67746 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 15:57:39 +0200 Subject: [PATCH 198/439] detect if compressed --- gridss.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d0428e59..c12c24d6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,6 +90,8 @@ task AnnotateSvTypes { Int timeMinutes = 240 } + String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< set -e @@ -115,7 +117,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path, index=T) + writeVcf(vcf, out_path, index=~{index}) EOF >>> From 358c946dc86024324455193032d53873b8361d33 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 16:36:09 +0200 Subject: [PATCH 199/439] fix duoble .bgz and and index to output --- gridss.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index c12c24d6..38daa029 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,7 +90,9 @@ task AnnotateSvTypes { Int timeMinutes = 240 } - String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + String effectiveOutputPath = sub(outputPath, "\\.bgz", "") + String index = if effectiveOutputPath != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< @@ -101,7 +103,7 @@ task AnnotateSvTypes { library(StructuralVariantAnnotation) vcf_path <- "~{gridssVcf}" - out_path <- "~{outputPath}" + out_path <- "~{effectiveOutputPath}" # Simple SV type classifier simpleEventType <- function(gr) { @@ -123,6 +125,7 @@ task AnnotateSvTypes { output { File vcf = outputPath + File? vcfIndex = outputPath + ".tbi" } runtime { From 760f89e95596cb55ef2b78c27bb61c85cadedcc2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 10:13:48 +0200 Subject: [PATCH 200/439] give bcftools sort more time --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 88d97cd0..589cddea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -187,7 +187,7 @@ task Sort { String tmpDir = "./sorting-tmp" String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 8e7ca0ce64ef97b3ba7859b245377294754edbd0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 14:07:19 +0200 Subject: [PATCH 201/439] increase memory for bcftools sort --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 589cddea..2bf1c732 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "256M" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3af704d65bf0ced2b0a76e049e1019031e2d1941 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 13:04:22 +0200 Subject: [PATCH 202/439] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71309ae8..986582dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task to add SVTYPE annotations to GRIDSS results + (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't produce a vcf index. + Add a script to subtract UMI's from the read name and add them as From 0f3cb30df3276150f6b168ebfc43ed596d9f140b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 16:10:59 +0200 Subject: [PATCH 203/439] Add GT to gridss results in AnnotateSvTypes --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 38daa029..35e41d21 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,6 +119,8 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype + # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) + geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) writeVcf(vcf, out_path, index=~{index}) EOF >>> From cbd6de84edb3776aef10e774f2d15f8c29902490 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 13:20:33 +0200 Subject: [PATCH 204/439] fix typo in star GenomeGenerate parameter_meta --- star.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index aa1fd608..6a123c86 100644 --- a/star.wdl +++ b/star.wdl @@ -78,7 +78,7 @@ task GenomeGenerate { parameter_meta { # inputs - genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} + genomeDir: {description:"The directory the STAR index should be written to.", category: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} From 9625c84b6749aa6b93f933d8a9bf307231dd73e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:12:44 +0200 Subject: [PATCH 205/439] update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986582dd..afd115c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,19 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for CupGenerateReport. ++ Updated Cuppa to version 1.6. ++ Added a task for Gripss. ++ Fixed the HealthChecker task's determination of the `succeeded` output + value. ++ Updated Linx to version 1.18. ++ Added a task for LinxVisualization. ++ Added a task for HMFtools Orange. ++ Added a task for HMFtools Pave. ++ Updated Purple to version 3.2. ++ Added plot and table outputs of Sage to task outputs. ++ Updated virus-interpreter to version 1.2. ++ Updated Peach to version 1.5. + Added a task to add SVTYPE annotations to GRIDSS results (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't From 743e4e0615aa3568f391e65b3fc064e188a6f12e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:35:42 +0200 Subject: [PATCH 206/439] fix linting issue --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..628e2f9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -190,7 +190,7 @@ task CupGenerateReport { command { set -e mkdir -p ./workdir ~{outputDir} - ln -s -t workdir ~{sep=" " cupData} + ln -s -t workdir ~{cupData} CupGenerateReport \ ~{sampleName} \ workdir/ From e996b7930959027c31a1f7a2fd4683692a13a8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:00:49 +0200 Subject: [PATCH 207/439] increase time for cobalt --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 32bc24fd..a59b3897 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } From e43bf3e4364a919cd3b380c58bb347d6be3a8069 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:38:48 +0200 Subject: [PATCH 208/439] update changelog --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index afd115c8..f750b212 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bedtools coverage's timeMinutes now defaults to `320`. ++ Gridss' runtime attribute defaults were changed to: + + jvmHeapSizeGb: `64` + + nonJvmMemoryGb: `10` + + threads: `12` ++ Virusbreakend's runtime attribute defaults were changed to: + + threads: `12` + + timeMinutes: `320` ++ Cobalt's timeMinutes now defaults to `480`. ++ Orange's timeMinutes now defaults to 10. ++ Sage's runtime attributes were changed to: + + threads: `32` + + javaXmx: `"16G"` + + memory: `"20G"` + + timeMinutes: `720` ++ Sambamba's runtimeMinutes nor defaults to `320`. + Added a task for CupGenerateReport. + Updated Cuppa to version 1.6. + Added a task for Gripss. From 24cc6213026dbe1de017ebeabc2de7fbfad912ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Aug 2022 11:11:48 +0200 Subject: [PATCH 209/439] make purple's somaticRainfallPlot output optional --- CHANGELOG.md | 2 ++ hmftools.wdl | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f750b212..be0e5a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Purple's `somaticRainfallPlot` output is now optional and included in + the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. + Gridss' runtime attribute defaults were changed to: + jvmHeapSizeGb: `64` diff --git a/hmftools.wdl b/hmftools.wdl index 1542bdfc..f878181a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1133,7 +1133,7 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File? somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -1150,8 +1150,8 @@ task Purple { purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] - Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot] + Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot]) Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 8993b5c662428a0bcdc5d2fd4806812b061db529 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:25:59 +0200 Subject: [PATCH 210/439] Use gebibytes instead of gigabytes --- CPAT.wdl | 4 +-- bam2fastx.wdl | 4 +-- bcftools.wdl | 10 +++---- bedtools.wdl | 16 +++++------ biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa-mem2.wdl | 4 +-- bwa.wdl | 6 ++-- ccs.wdl | 2 +- centrifuge.wdl | 10 +++---- chunked-scatter.wdl | 4 +-- clever.wdl | 4 +-- collect-columns.wdl | 2 +- common.wdl | 20 +++++++------- cutadapt.wdl | 2 +- deconstructsigs.wdl | 2 +- deepvariant.wdl | 2 +- delly.wdl | 2 +- duphold.wdl | 2 +- extractSigPredictHRD.wdl | 2 +- fastqc.wdl | 2 +- fastqsplitter.wdl | 2 +- fgbio.wdl | 2 +- flash.wdl | 2 +- gatk.wdl | 56 ++++++++++++++++++------------------- gffcompare.wdl | 4 +-- gffread.wdl | 4 +-- gridss.wdl | 10 +++---- hisat2.wdl | 2 +- hmftools.wdl | 34 +++++++++++------------ htseq.wdl | 4 +-- isoseq3.wdl | 2 +- lima.wdl | 2 +- macs2.wdl | 2 +- manta.wdl | 4 +-- minimap2.wdl | 4 +-- multiqc.wdl | 6 ++-- nanopack.wdl | 4 +-- pacbio.wdl | 4 +-- pbbam.wdl | 2 +- pbmm2.wdl | 2 +- peach.wdl | 2 +- picard.wdl | 60 ++++++++++++++++++++-------------------- prepareShiny.wdl | 4 +-- rtg.wdl | 8 +++--- sambamba.wdl | 10 +++---- samtools.wdl | 34 +++++++++++------------ scripts | 2 +- smoove.wdl | 2 +- snpeff.wdl | 2 +- somaticseq.wdl | 10 +++---- spades.wdl | 2 +- star.wdl | 10 +++---- strelka.wdl | 4 +-- stringtie.wdl | 4 +-- survivor.wdl | 2 +- talon.wdl | 20 +++++++------- transcriptclean.wdl | 6 ++-- umi-tools.wdl | 6 ++-- umi.wdl | 4 +-- unicycler.wdl | 2 +- vardict.wdl | 2 +- vt.wdl | 2 +- whatshap.wdl | 6 ++-- wisestork.wdl | 8 +++--- 65 files changed, 234 insertions(+), 234 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index e6cef3ea..b96ea0d7 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,8 +34,8 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons - String memory = "4G" - Int timeMinutes = 10 + ceil(size(gene, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 10 + ceil(size(gene, "GiB") * 30) String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..62827fd9 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -30,7 +30,7 @@ task Bam2Fasta { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } @@ -98,7 +98,7 @@ task Bam2Fastq { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } diff --git a/bcftools.wdl b/bcftools.wdl index 2bf1c732..726d2e37 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -138,7 +138,7 @@ task Filter { String? softFilter String outputPath = "./filtered.vcf.gz" - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(vcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -261,7 +261,7 @@ task Stats { String? userTsTv Int threads = 0 - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -350,7 +350,7 @@ task View { String? exclude String? include - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bedtools.wdl b/bedtools.wdl index 80a281d6..fe18ede6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -26,7 +26,7 @@ task Complement { File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" + String memory = "~{512 + ceil(size([inputBed, faidx], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -75,7 +75,7 @@ task Coverage { File? bIndex String outputPath = "./coverage.tsv" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } @@ -120,7 +120,7 @@ task Merge { File inputBed String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(inputBed, "M"))}M" + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -159,7 +159,7 @@ task MergeBedFiles { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(bedFiles, "M"))}M" + String memory = "~{512 + ceil(size(bedFiles, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -207,8 +207,8 @@ task Sort { File? genome File? faidx - String memory = "~{512 + ceil(size(inputBed, "M"))}M" - Int timeMinutes = 1 + ceil(size(inputBed, "G")) + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -267,8 +267,8 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. - String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" - Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biowdl.wdl b/biowdl.wdl index dead8303..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,7 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } diff --git a/bowtie.wdl b/bowtie.wdl index 87210dcd..7e817594 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,7 @@ task Bowtie { String picardXmx = "4G" Int threads = 1 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" + String memory = "~{5 + ceil(size(indexFiles, "GiB"))}GiB" Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 4566e68c..b3db0ad1 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -36,7 +36,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "GiB") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } @@ -84,7 +84,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/bwa.wdl b/bwa.wdl index 373de628..d4f4495a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "GiB") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "GiB") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. @@ -81,7 +81,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/ccs.wdl b/ccs.wdl index 29f1a7f9..27db15ab 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -42,7 +42,7 @@ task CCS { String? chunkString Int threads = 2 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } diff --git a/centrifuge.wdl b/centrifuge.wdl index 07dc7f85..757af239 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -36,7 +36,7 @@ task Build { File? sizeTable Int threads = 5 - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -109,7 +109,7 @@ task Classify { String? excludeTaxIDs Int threads = 4 - String memory = "16G" + String memory = "16GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -186,7 +186,7 @@ task Inspect { Int? across - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -245,7 +245,7 @@ task KReport { Int? minimumScore Int? minimumLength - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -303,7 +303,7 @@ task KTimportTaxonomy { File inputFile String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "biocontainers/krona:v2.7.1_cv1" } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 66954c36..af24b139 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,7 +30,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } @@ -84,7 +84,7 @@ task ScatterRegions { Int? scatterSize - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } diff --git a/clever.wdl b/clever.wdl index 186be514..791a0ba1 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,7 +34,7 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,7 +94,7 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55G" + String memory = "55GiB" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } diff --git a/collect-columns.wdl b/collect-columns.wdl index 3d65c7e7..03ccb6f7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -62,7 +62,7 @@ task CollectColumns { } runtime { - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/common.wdl b/common.wdl index 1e4fc8cb..1ce2895f 100644 --- a/common.wdl +++ b/common.wdl @@ -25,7 +25,7 @@ task AppendToStringArray { Array[String] array String string - String memory = "1G" + String memory = "1GiB" } command { @@ -51,7 +51,7 @@ task CheckFileMD5 { # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -75,7 +75,7 @@ task ConcatenateTextFiles { Boolean unzip = false Boolean zip = false - String memory = "1G" + String memory = "1GiB" } # When input and output is both compressed decompression is not needed. @@ -104,7 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -132,7 +132,7 @@ task CreateLink { String inputFile String outputPath - String memory = "1G" + String memory = "1GiB" } command { @@ -170,7 +170,7 @@ task GetSamplePositionInArray { runtime { # 4 gigs of memory to be able to build the docker image in singularity. - memory: "4G" + memory: "4GiB" docker: dockerImage timeMinutes: 5 } @@ -190,7 +190,7 @@ task MapMd5 { input { Map[String,String] map - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -214,7 +214,7 @@ task StringArrayMd5 { input { Array[String] stringArray - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -238,7 +238,7 @@ task TextToFile { String text String outputFile = "out.txt" - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -274,7 +274,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" diff --git a/cutadapt.wdl b/cutadapt.wdl index b49a95d4..9a67692c 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl index ef47e3e3..c44bf9c0 100644 --- a/deconstructsigs.wdl +++ b/deconstructsigs.wdl @@ -27,7 +27,7 @@ task DeconstructSigs { String outputPath = "./signatures.rds" Int timeMinutes = 15 - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" } diff --git a/deepvariant.wdl b/deepvariant.wdl index 28aee813..25d05bd9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -37,7 +37,7 @@ task RunDeepVariant { String? sampleName Boolean? VCFStatsReport = true - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5000 String dockerImage = "google/deepvariant:1.0.0" } diff --git a/delly.wdl b/delly.wdl index bf00ed36..7333c5ff 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File referenceFastaFai String outputPath = "./delly/delly.bcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } diff --git a/duphold.wdl b/duphold.wdl index 80fe31d2..0426da56 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -30,7 +30,7 @@ task Duphold { String sample String outputPath = "./duphold.vcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 2b5d9781..1520b608 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,7 +30,7 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/fastqc.wdl b/fastqc.wdl index 3a07db4e..d821e531 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -45,7 +45,7 @@ task Fastqc { # weird edge case fastq's. String javaXmx="1750M" Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index 25a50954..4a02697c 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -63,7 +63,7 @@ task Fastqsplitter { runtime { cpu: cores - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } diff --git a/fgbio.wdl b/fgbio.wdl index d50906d3..15fb0ea4 100644 --- a/fgbio.wdl +++ b/fgbio.wdl @@ -26,7 +26,7 @@ task AnnotateBamWithUmis { File inputUmi String outputPath - String memory = "120G" + String memory = "120GiB" Int timeMinutes = 360 String javaXmx="100G" String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" diff --git a/flash.wdl b/flash.wdl index c4554c50..7b50e0d7 100644 --- a/flash.wdl +++ b/flash.wdl @@ -34,7 +34,7 @@ task Flash { Int? maxOverlap Int threads = 2 - String memory = "2G" + String memory = "2GiB" } command { diff --git a/gatk.wdl b/gatk.wdl index 5cf7c673..0b93efe6 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -129,7 +129,7 @@ task ApplyBQSR { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -197,7 +197,7 @@ task BaseRecalibrator { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -232,7 +232,7 @@ task CalculateContamination { File? normalPileups String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -279,7 +279,7 @@ task CallCopyRatioSegments { File copyRatioSegments String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -332,7 +332,7 @@ task CollectAllelicCounts { File? commonVariantSitesIndex String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -390,7 +390,7 @@ task CollectReadCounts { String intervalMergingRule = "OVERLAPPING_ONLY" String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -449,7 +449,7 @@ task CombineGVCFs { File referenceFastaFai String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -509,7 +509,7 @@ task CombineVariants { String outputPath String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -579,7 +579,7 @@ task CreateReadCountPanelOfNormals { File? annotatedIntervals String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 5 # The biocontainer causes a spark related error for some reason. String dockerImage = "broadinstitute/gatk:4.1.8.0" @@ -629,7 +629,7 @@ task DenoiseReadCounts { File? annotatedIntervals String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -690,7 +690,7 @@ task FilterMutectCalls { File? artifactPriors String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -775,7 +775,7 @@ task GatherBqsrReports { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -805,7 +805,7 @@ task GenomicsDBImport { String? tmpDir String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -866,7 +866,7 @@ task GenotypeGVCFs { File? pedigree String javaXmx = "6G" - String memory = "7G" + String memory = "7GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -932,7 +932,7 @@ task GetPileupSummaries { String outputPrefix String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1032,7 +1032,7 @@ task HaplotypeCaller { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -1073,7 +1073,7 @@ task LearnReadOrientationModel { Array[File]+ f1r2TarGz String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1114,7 +1114,7 @@ task MergeStats { Array[File]+ stats String javaXmx = "14G" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1162,7 +1162,7 @@ task ModelSegments { File? normalAllelicCounts String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1250,7 +1250,7 @@ task MuTect2 { File? panelOfNormalsIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1325,7 +1325,7 @@ task PlotDenoisedCopyRatios { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1393,7 +1393,7 @@ task PlotModeledSegments { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1454,7 +1454,7 @@ task PreprocessIntervals { File? intervals String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1516,7 +1516,7 @@ task SelectVariants { String? selectTypeToInclude String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1576,7 +1576,7 @@ task SplitNCigarReads { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1645,7 +1645,7 @@ task VariantEval { File? dbsnpVCFIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1722,7 +1722,7 @@ task VariantFiltration { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } diff --git a/gffcompare.wdl b/gffcompare.wdl index d06602bc..fe1db0a8 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,8 +46,8 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "GiB") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. diff --git a/gffread.wdl b/gffread.wdl index a04540f5..26a2773c 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,8 +32,8 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGff, "GiB") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/gridss.wdl b/gridss.wdl index add3c08f..cfe53751 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -85,7 +85,7 @@ task AnnotateSvTypes { File gridssVcfIndex String outputPath = "./gridss.svtyped.vcf.bgz" - String memory = "32G" + String memory = "32GiB" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" Int timeMinutes = 240 } @@ -201,7 +201,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -239,7 +239,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "25G" + String memory = "25GiB" Int threads = 8 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 @@ -289,7 +289,7 @@ task Virusbreakend { File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String memory = "75G" + String memory = "75GiB" Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 320 diff --git a/hisat2.wdl b/hisat2.wdl index a2c0777c..50fabc9d 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -82,7 +82,7 @@ task Hisat2 { runtime { cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index f878181a..26ab4e4a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70G" + String memory = "70GiB" String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" @@ -174,7 +174,7 @@ task CupGenerateReport { File cupData String outputDir = "./cuppa" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -242,7 +242,7 @@ task Cuppa { String outputDir = "./cuppa" String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -298,7 +298,7 @@ task CuppaChart { File cupData String outputDir = "./cuppa" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -348,7 +348,7 @@ task Gripss { File vcfIndex String outputDir = "./" - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 50 String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" @@ -419,7 +419,7 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "32G" + String memory = "32GiB" String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -478,7 +478,7 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -527,7 +527,7 @@ task HealthChecker { Array[File]+ purpleOutput String javaXmx = "2G" - String memory = "1G" + String memory = "3GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" + String memory = "9iB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -690,7 +690,7 @@ task LinxVisualisations { Array[File]+ linxOutput Boolean plotReportable = true - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -778,7 +778,7 @@ task Orange { File cohortMappingTsv File cohortPercentilesTsv - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" @@ -902,7 +902,7 @@ task Pave { Int timeMinutes = 50 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/pave:v1.0" } @@ -979,7 +979,7 @@ task Protect { File chordPrediction File annotatedVirus - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biowdl/protect:v2.0" @@ -1078,7 +1078,7 @@ task Purple { Int threads = 1 Int timeMinutes = 30 - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" @@ -1227,7 +1227,7 @@ task Sage { Int threads = 32 String javaXmx = "16G" - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } @@ -1315,7 +1315,7 @@ task VirusInterpreter { File virusReportingDbTsv String outputDir = "." - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" diff --git a/htseq.wdl b/htseq.wdl index 76d3bb83..92bc4423 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,8 +33,8 @@ task HTSeqCount { String? idattr Int nprocesses = 1 - String memory = "8G" - Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME + String memory = "8GiB" + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "GiB") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index aacbfc60..77f19f80 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { String outputNamePrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } diff --git a/lima.wdl b/lima.wdl index 6b87ad4f..eece2b3f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -49,7 +49,7 @@ task Lima { String outputPrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } diff --git a/macs2.wdl b/macs2.wdl index 2afe3bbe..e6a011ad 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -31,7 +31,7 @@ task PeakCalling { String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } diff --git a/manta.wdl b/manta.wdl index 1c949af2..6804f304 100644 --- a/manta.wdl +++ b/manta.wdl @@ -60,7 +60,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } @@ -138,7 +138,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/minimap2.wdl b/minimap2.wdl index 50ff4db3..96cc7734 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -31,7 +31,7 @@ task Indexing { Int? splitIndex Int cores = 1 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -98,7 +98,7 @@ task Mapping { String? howToFindGTAG Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a1662937..21fc8a7d 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,11 +57,11 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 10 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } - Int memoryGb = 2 + ceil(size(reports, "G")) + Int memoryGb = 2 + ceil(size(reports, "GiB")) # This is where the reports end up. It does not need to be changed by the # user. It is full of symbolic links, so it is not of any use to the user @@ -139,7 +139,7 @@ task MultiQC { } runtime { - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/nanopack.wdl b/nanopack.wdl index e4c94a43..bd3f433e 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -40,7 +40,7 @@ task NanoPlot { String? readType Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } @@ -130,7 +130,7 @@ task NanoQc { Int? minLength - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" } diff --git a/pacbio.wdl b/pacbio.wdl index b21c69bc..dcf0f69e 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -25,7 +25,7 @@ task mergePacBio { Array[File]+ reports String outputPathMergedReport - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } @@ -62,7 +62,7 @@ task ccsChunks { input { Int chunkCount - String memory = "4G" + String memory = "4GiB" String dockerImage = "python:3.7-slim" } diff --git a/pbbam.wdl b/pbbam.wdl index ae64b87c..d5cafed6 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -26,7 +26,7 @@ task Index { String? outputBamPath - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" } diff --git a/pbmm2.wdl b/pbmm2.wdl index 5fda1c87..ea7c05df 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -29,7 +29,7 @@ task Mapping { File queryFile Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } diff --git a/peach.wdl b/peach.wdl index d1bc17f8..7da029d0 100644 --- a/peach.wdl +++ b/peach.wdl @@ -29,7 +29,7 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } diff --git a/picard.wdl b/picard.wdl index 3d835829..f762ecdd 100644 --- a/picard.wdl +++ b/picard.wdl @@ -27,7 +27,7 @@ task BedToIntervalList { String outputPath = "regions.interval_list" String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -88,7 +88,7 @@ task CollectHsMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the # reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -109,7 +109,7 @@ task CollectHsMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -157,7 +157,7 @@ task CollectMultipleMetrics { Int javaXmxMb = 3072 Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -223,7 +223,7 @@ task CollectMultipleMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -281,9 +281,9 @@ task CollectRnaSeqMetrics { String strandSpecificity = "NONE" String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" # With 6 minutes per G there were several timeouts. - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -340,8 +340,8 @@ task CollectTargetedPcrMetrics { String basename String javaXmx = "3G" - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -402,7 +402,7 @@ task CollectVariantCallingMetrics { String basename String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -516,7 +516,7 @@ task CreateSequenceDictionary { String outputDir String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -568,7 +568,7 @@ task GatherBamFiles { Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -593,7 +593,7 @@ task GatherBamFiles { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -630,8 +630,8 @@ task GatherVcfs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -700,7 +700,7 @@ task MarkDuplicates { Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -736,7 +736,7 @@ task MarkDuplicates { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -782,8 +782,8 @@ task MergeVCFs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -838,7 +838,7 @@ task SamToFastq { Boolean paired = true String javaXmx = "16G" # High memory default to avoid crashes. - String memory = "17G" + String memory = "17GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" @@ -900,7 +900,7 @@ task ScatterIntervalList { Int scatter_count String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -943,7 +943,7 @@ task SortSam { # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -971,7 +971,7 @@ task SortSam { runtime { cpu: 1 - memory: "~{1 + XmxGb}G" + memory: "~{1 + XmxGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -1004,8 +1004,8 @@ task SortVcf { File? dict String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1054,8 +1054,8 @@ task RenameSample { String newSampleName String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1109,7 +1109,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean useJdkInflater = false Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d669e2d1..28910743 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,7 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } @@ -67,7 +67,7 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } diff --git a/rtg.wdl b/rtg.wdl index 0e86ce3f..3e9dab9b 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,8 +27,8 @@ task Format { String outputPath = "seq_data.sdf" String rtgMem = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } @@ -85,8 +85,8 @@ task VcfEval { String rtgMem = "8G" Int threads = 1 # Tool default is number of cores in the system 😱. - String memory = "9G" - Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size([baseline, calls], "GiB") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/sambamba.wdl b/sambamba.wdl index 6696668a..be347f94 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -27,7 +27,7 @@ task Flagstat { String outputPath = "./flagstat.txt" Int threads = 2 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -113,7 +113,7 @@ task Markdup { runtime { cpu: threads - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -149,7 +149,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -177,7 +177,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/samtools.wdl b/samtools.wdl index 81b6c17d..e1b08173 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,8 +26,8 @@ task BgzipAndIndex { String outputDir String type = "vcf" - String memory = "2G" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -71,7 +71,7 @@ task Faidx { File inputFile String outputDir - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -119,7 +119,7 @@ task Fastq { Int? compressionLevel Int threads = 1 - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -183,8 +183,8 @@ task FilterShortReadsBam { File bamFile String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -229,7 +229,7 @@ task Flagstat { File inputBam String outputPath - String memory = "256M" # Only 40.5 MiB used for 150G bam file. + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -269,8 +269,8 @@ task Index { String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -321,7 +321,7 @@ task Markdup { File inputBam String outputBamPath - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -359,8 +359,8 @@ task Merge { Boolean force = true Int threads = 1 - String memory = "4G" - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -415,7 +415,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -444,7 +444,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -473,7 +473,7 @@ task Tabix { String outputFilePath = "indexed.vcf.gz" String type = "vcf" - Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -526,8 +526,8 @@ task View { Int? MAPQthreshold Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a1ac38b 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -29,7 +29,7 @@ task Call { String sample String outputDir = "./smoove" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" } diff --git a/snpeff.wdl b/snpeff.wdl index 4a3640c7..0f14e5b5 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,7 +36,7 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" diff --git a/somaticseq.wdl b/somaticseq.wdl index 63f8362e..7656d086 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,7 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -162,7 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -270,7 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -362,7 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -441,7 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } diff --git a/spades.wdl b/spades.wdl index 3975dd32..d717ab28 100644 --- a/spades.wdl +++ b/spades.wdl @@ -100,6 +100,6 @@ task Spades { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" } } diff --git a/star.wdl b/star.wdl index 6a123c86..88d3c838 100644 --- a/star.wdl +++ b/star.wdl @@ -29,8 +29,8 @@ task GenomeGenerate { Int? sjdbOverhang Int threads = 4 - String memory = "32G" - Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads) + String memory = "32GiB" + Int timeMinutes = ceil(size(referenceFasta, "GiB") * 240 / threads) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -130,12 +130,12 @@ task Star { Int runThreadN = 4 String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) + Int timeMinutes = 1 + ceil(size(indexFiles, "GiB")) + ceil(size(flatten([inputR1, inputR2]), "GiB") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + Int memoryGb = 1 + ceil(size(indexFiles, "GiB") * 1.3) # For some reason doing above calculation inside a string does not work. # So we solve it with an optional memory string and using select_first # in the runtime section. @@ -172,7 +172,7 @@ task Star { runtime { cpu: runThreadN - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/strelka.wdl b/strelka.wdl index be08e386..39afe172 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -63,7 +63,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -139,7 +139,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/stringtie.wdl b/stringtie.wdl index 9c2f3cfc..fbe7e442 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -34,7 +34,7 @@ task Stringtie { Float? minimumCoverage Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } @@ -102,7 +102,7 @@ task Merge { Float? minimumIsoformFraction String? label - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } diff --git a/survivor.wdl b/survivor.wdl index de232405..b233fb52 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -31,7 +31,7 @@ task Merge { Int minSize = 30 String outputPath = "./survivor/merged.vcf" - String memory = "24G" + String memory = "24GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } diff --git a/talon.wdl b/talon.wdl index 61f5eb4a..2f93e36b 100644 --- a/talon.wdl +++ b/talon.wdl @@ -30,7 +30,7 @@ task CreateAbundanceFileFromDatabase { File? whitelistFile File? datasetsFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -86,7 +86,7 @@ task CreateGtfFromDatabase { File? whitelistFile File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -144,7 +144,7 @@ task FilterTalonTranscripts { File? datasetsFile Int? minDatasets - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -200,7 +200,7 @@ task GetReadAnnotations { File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -248,7 +248,7 @@ task GetSpliceJunctions { String runMode = "intron" String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -302,7 +302,7 @@ task InitializeTalonDatabase { Int cutOff3p = 300 String outputPrefix - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 60 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -360,7 +360,7 @@ task LabelReads { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -413,7 +413,7 @@ task ReformatGtf { input { File gtfFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -454,7 +454,7 @@ task SummarizeDatasets { File? datasetGroupsCsv - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 50 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -506,7 +506,7 @@ task Talon { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index efdd95f4..8607a7a3 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -27,7 +27,7 @@ task GetSJsFromGtf { String outputPrefix Int minIntronSize = 21 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -72,7 +72,7 @@ task GetTranscriptCleanStats { File inputSam String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -128,7 +128,7 @@ task TranscriptClean { File? variantFile Int cores = 1 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } diff --git a/umi-tools.wdl b/umi-tools.wdl index b79817c2..d8d17c48 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,7 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -87,8 +87,8 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" - Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) + String memory = "25GiB" + Int timeMinutes = 30 + ceil(size(inputBam, "GiB") * 30) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } diff --git a/umi.wdl b/umi.wdl index 0dc5c55e..e7f01fc2 100644 --- a/umi.wdl +++ b/umi.wdl @@ -30,8 +30,8 @@ task BamReadNameToUmiTag { String outputPath = "output.bam" String umiTag = "RX" - String memory = "2G" - Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } diff --git a/unicycler.wdl b/unicycler.wdl index 938d0c7e..d83db3ca 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -66,7 +66,7 @@ task Unicycler { String? lowScore Int threads = 1 - String memory = "4G" + String memory = "4GiB" } command { diff --git a/vardict.wdl b/vardict.wdl index 1c20e51c..187b4567 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,7 +48,7 @@ task VarDict { String javaXmx = "16G" Int threads = 1 - String memory = "18G" + String memory = "18GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } diff --git a/vt.wdl b/vt.wdl index 85077dae..4da2d8cd 100644 --- a/vt.wdl +++ b/vt.wdl @@ -29,7 +29,7 @@ task Normalize { Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } diff --git a/whatshap.wdl b/whatshap.wdl index 7307ce7c..da86ad82 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,7 +38,7 @@ task Phase { String? threshold String? ped - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -109,7 +109,7 @@ task Stats { String? blockList String? chromosome - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -169,7 +169,7 @@ task Haplotag { String? regions String? sample - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" diff --git a/wisestork.wdl b/wisestork.wdl index 8fb4b76b..bef54e27 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,7 +31,7 @@ task Count { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -69,7 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -129,7 +129,7 @@ task Newref { } runtime { - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } @@ -147,7 +147,7 @@ task Zscore { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } From 5523913a18f121dcc524cac346dd82cf1162e804 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:37:42 +0200 Subject: [PATCH 211/439] Update changelog with memory change --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be0e5a7c..5f4fed5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` + previously. The WDL spec clearly distuingishes between SI and binary + notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and + `GiB` this means java tasks such as GATK, FastQC and Picard will always + receive enough memory now. + Purple's `somaticRainfallPlot` output is now optional and included in the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. From 75bb0cbcf2d2ccc57e8c5857f140cffe2a310c67 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Aug 2022 13:57:10 +0200 Subject: [PATCH 212/439] update survivor version --- CHANGELOG.md | 1 + survivor.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..b0b7c3e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Updated SURVIVOR version to 1.0.7 + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and diff --git a/survivor.wdl b/survivor.wdl index b233fb52..ae246f60 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -33,7 +33,7 @@ task Merge { String memory = "24GiB" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" + String dockerImage = "quay.io/biocontainers/survivor:1.0.7--hd03093a_2" } command { From bf7aba3c332a8dcabc87d22e1740049ed4bf7db4 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 7 Oct 2022 17:59:35 +0200 Subject: [PATCH 213/439] add fastp --- fastp.wdl | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 fastp.wdl diff --git a/fastp.wdl b/fastp.wdl new file mode 100644 index 00000000..8cf99d99 --- /dev/null +++ b/fastp.wdl @@ -0,0 +1,101 @@ +verison 1.0 + +# MIT License +# +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Fastp { + input { + File r1 + File r2 + String outputPathR1 + String outputPathR2 + String htmlPath + String jsonPath + + Int compressionLevel = 1 + Boolean correction = false + Int lengthRequired = 15 + Int? split + + Int threads = 4 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + } + + String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + + command { + set -e + mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + # predict output paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + fastp \ + -i ~{r1} \ + ~{"-I " + r2} \ + -o ~{outputPathR1} \ + ~{"-O " + outputPathR2} \ + -h ~{htmlPath} \ + -j ~{jsonPath} \ + -z ~{compressionLevel} \ + ~{if correction then "--correction" else ""} \ + --length_required ~{lengthRequired} \ + --threads ~{threads} \ + ~{"--split " + split} \ + ~{if defined(split) then "-d 0" else ""} + } + + Array[String] r1Paths = read_lines("r1_paths") + Array[String] r2Paths = read_lines("r2_paths") + + output { + File htmlReport = htmlPath + File jsonReport = jsonPath + Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] + Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + } + + runtime { + cpu: cores + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + r1: {description: "The R1 fastq file.", category: "required"} + r2: {description: "The R2 fastq file.", category: "required"} + outputPathR1: {description: "The output path for the R1 file.", category: "required"} + outputPathR2: {description: "The output path for the R2 file.", category: "required"} + htmlPath: {description: "The path to write the html report to.", category: "required"} + jsonPath: {description: "The path to write the json report to.", category: "required"} + compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} + correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} + lengthRequired: {description: "The minimum read length.", category: "advanced"} + split: {description: "The number of chunks to split the files into.", category: "common"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 64427306fbbf58eb3ca9b3850a223d06894c9391 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Oct 2022 12:13:08 +0200 Subject: [PATCH 214/439] fix some issues in fastp, add picard CollectInzertSizeMetrics --- fastp.wdl | 28 ++++++++++++++++------------ picard.wdl | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 8cf99d99..3063d012 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -1,4 +1,4 @@ -verison 1.0 +version 1.0 # MIT License # @@ -24,8 +24,8 @@ verison 1.0 task Fastp { input { - File r1 - File r2 + File read1 + File read2 String outputPathR1 String outputPathR2 String htmlPath @@ -35,24 +35,26 @@ task Fastp { Boolean correction = false Int lengthRequired = 15 Int? split + Boolean performAdapterTrimming = true Int threads = 4 String memory = "5GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") - command { + command <<< set -e mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ - -i ~{r1} \ - ~{"-I " + r2} \ + -i ~{read1} \ + ~{"-I " + read2} \ -o ~{outputPathR1} \ ~{"-O " + outputPathR2} \ -h ~{htmlPath} \ @@ -62,8 +64,9 @@ task Fastp { --length_required ~{lengthRequired} \ --threads ~{threads} \ ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} - } + ~{if defined(split) then "-d 0" else ""} \ + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + >>> Array[String] r1Paths = read_lines("r1_paths") Array[String] r2Paths = read_lines("r2_paths") @@ -76,15 +79,15 @@ task Fastp { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { - r1: {description: "The R1 fastq file.", category: "required"} - r2: {description: "The R2 fastq file.", category: "required"} + read1: {description: "The R1 fastq file.", category: "required"} + read2: {description: "The R2 fastq file.", category: "required"} outputPathR1: {description: "The output path for the R1 file.", category: "required"} outputPathR2: {description: "The output path for the R2 file.", category: "required"} htmlPath: {description: "The path to write the html report to.", category: "required"} @@ -93,6 +96,7 @@ task Fastp { correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into.", category: "common"} + performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index f762ecdd..6628cf0e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -136,6 +136,58 @@ task CollectHsMetrics { } } +task CollectInsertSizeMetrics { + input { + File inputBam + File inputBamIndex + + Float? minimumPercentage + String basename = "./insertSize_metrics" + + String memory = "5GiB" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectInsertSizeMetrics \ + I=~{inputBam} \ + O=~{basename}.txt \ + H=~{basename}.pdf \ + ~{"M=" + minimumPercentage} + } + + output { + File metricsTxt = "~{basename}.txt" + File metricsPdf = "~{basename}.pdf" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + minimumPercentage: {description: "Equivalent to picard CollectInsertSizeMetrics' `M` option.", category: "advanced"} + basename: {description: "The basename for the output files.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam From 346c0044a15279e1e3c5cd7140e24d9321255be8 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:21:07 +0200 Subject: [PATCH 215/439] fix fastp task --- fastp.wdl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 3063d012..c7a4d19f 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -48,7 +48,11 @@ task Fastp { command <<< set -e - mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + mkdir -p $(dirname ~{outputPathR1}) + mkdir -p $(dirname ~{outputPathR2}) + mkdir -p $(dirname ~{htmlPath}) + mkdir -p $(dirname ~{jsonPath}) + # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths @@ -68,14 +72,11 @@ task Fastp { ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> - Array[String] r1Paths = read_lines("r1_paths") - Array[String] r2Paths = read_lines("r2_paths") - output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] - Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] } runtime { From 5b55e1b657b4d6d9ee189317d7cc5054493ef863 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:26:38 +0200 Subject: [PATCH 216/439] typo --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index c7a4d19f..572de7dc 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --threads ~{threads} \ + --thread ~{threads} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} From 9dda4c842ac98d083bd9c9fdeec1e97437040e65 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 16:02:40 +0200 Subject: [PATCH 217/439] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 572de7dc..becbaf4b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "5GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From f8aa7e37593df2282161bc37c49a1d0b5039185b Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 18:06:18 +0200 Subject: [PATCH 218/439] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index becbaf4b..25f09e39 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "10GiB" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From e9215442ac12ff2f9ea4833b69daf809d8957cc6 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Fri, 21 Oct 2022 15:14:04 +0200 Subject: [PATCH 219/439] fastp: use number of splits as number of threads if set --- fastp.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 25f09e39..7f269d81 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,8 +38,8 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "20GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) + String memory = "50GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{threads} \ + --thread ~{select_first([split, threads])} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} @@ -80,7 +80,7 @@ task Fastp { } runtime { - cpu: threads + cpu: select_first([split, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage @@ -96,9 +96,9 @@ task Fastp { compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} - split: {description: "The number of chunks to split the files into.", category: "common"} + split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c7754754273f3ae4ce4bb34a9211cafec7880306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 21 Oct 2022 16:48:34 +0200 Subject: [PATCH 220/439] Add a task to produce fasta indices --- biowdl.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/biowdl.wdl b/biowdl.wdl index f891618e..7392983a 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -73,3 +73,49 @@ task InputConverter { json: {description: "JSON file version of the input sample sheet."} } } + +task IndexFastaFile { + input { + File inputFile + String outputDir = "." + String javaXmx = "2G" + String memory = "3GiB" + } + String outputFile = outputDir + "/" + basename(inputFile) + # This executes both picard and samtools, so indexes are co-located in the same folder. + command <<< + set -e + mkdir -p ~{outputDir} + ln -s ~{inputFile} ~{outputFile} + picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputFile}.dict" + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputFile + ".dict" + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + # Contains picard 2.27.4, samtools 1.15.1 + docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + } +} \ No newline at end of file From 8d5a451e1d3938f62d14add4167fcf83dd9a0e70 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 24 Oct 2022 09:45:54 +0200 Subject: [PATCH 221/439] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 26ab4e4a..5776dfed 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9iB" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" From f05d968d69d6c3a41b03a761a4a4838e5889df6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 14:45:15 +0200 Subject: [PATCH 222/439] Add a Bwa index task --- bwa.wdl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index d4f4495a..f79a219a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -114,3 +114,29 @@ struct BwaIndex { File fastaFile Array[File] indexFiles } + +task Index { + input { + File fasta + } + File indexedFile = "reference.fasta" + + command { + set -e + cp ~{fasta} ~{indexedFile} + bwa index ~{indexedFile} + } + + output { + BwaIndex index = { + "fastaFile": indexedFile, + "indexFiles": [ + indexedFile + ".amb", + indexedFile + ".ann", + indexedFile + ".bwt", + indexedFile + ".pac", + indexedFile + ".sa" + ] + } + } +} \ No newline at end of file From 23b324ea33f63cb4901fd66528f4ecead4cab0d5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:02:55 +0200 Subject: [PATCH 223/439] Copy reference to prevent problems --- biowdl.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biowdl.wdl b/biowdl.wdl index 7392983a..fe49a6cf 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -86,7 +86,7 @@ task IndexFastaFile { command <<< set -e mkdir -p ~{outputDir} - ln -s ~{inputFile} ~{outputFile} + cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ From 4431b259d68024b057fe5cfd5dc4de2424450d4b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:46:09 +0200 Subject: [PATCH 224/439] Make sure index task works --- bwa.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f79a219a..a129ebb4 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - File indexedFile = "reference.fasta" + String indexedFile = "reference.fasta" command { set -e @@ -128,9 +128,9 @@ task Index { } output { - BwaIndex index = { - "fastaFile": indexedFile, - "indexFiles": [ + BwaIndex index = object { + fastaFile: indexedFile, + indexFiles: [ indexedFile + ".amb", indexedFile + ".ann", indexedFile + ".bwt", @@ -139,4 +139,10 @@ task Index { ] } } + + runtime { + docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + cpu: 1 + memory: "~{size(fasta, 'G') + 1}GiB" + } } \ No newline at end of file From af929db9c2392cdc24a3ef2e7c644ca4d055cc3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:17:40 +0200 Subject: [PATCH 225/439] Use the basename of the input file for index names --- biowdl.wdl | 11 +++++------ bwa.wdl | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index fe49a6cf..58e94df8 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -77,27 +77,27 @@ task InputConverter { task IndexFastaFile { input { File inputFile - String outputDir = "." String javaXmx = "2G" String memory = "3GiB" } - String outputFile = outputDir + "/" + basename(inputFile) + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" # This executes both picard and samtools, so indexes are co-located in the same folder. command <<< set -e - mkdir -p ~{outputDir} cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ - OUTPUT="~{outputFile}.dict" + OUTPUT="~{outputDict}" samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai >>> output { File outputFasta = outputFile - File outputFastaDict = outputFile + ".dict" + File outputFastaDict = outputDict File outputFastaFai = outputFile + ".fai" } @@ -110,7 +110,6 @@ task IndexFastaFile { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - outputDir: {description: "Output directory path.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} # outputs diff --git a/bwa.wdl b/bwa.wdl index a129ebb4..8f694b45 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - String indexedFile = "reference.fasta" + String indexedFile = basename(fasta) command { set -e From 2dc14b39d06dcc1c8161a9bf5840ebe5d88ccb25 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:33:20 +0200 Subject: [PATCH 226/439] Make index use the basename of the file --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index e1b08173..bee38d11 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -470,7 +470,7 @@ task Sort { task Tabix { input { File inputFile - String outputFilePath = "indexed.vcf.gz" + String outputFilePath = basename(inputFile) String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) From 46bf6537c1787f47b7758d350b6605dae6da00cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 26 Oct 2022 14:38:17 +0200 Subject: [PATCH 227/439] Add indexing tasks to the changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..d94c2b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From c6fe0300c5d2e5275739148c051f931e717cd6f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:38:30 +0200 Subject: [PATCH 228/439] Use samtools dict instead of Picard CreateSequenceDictionary --- CHANGELOG.md | 2 +- biowdl.wdl | 45 --------------------------------------------- samtools.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d94c2b56..b9df32a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a combined samtools dict and samtools faidx task. + Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary diff --git a/biowdl.wdl b/biowdl.wdl index 58e94df8..463dab75 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,49 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} - -task IndexFastaFile { - input { - File inputFile - String javaXmx = "2G" - String memory = "3GiB" - } - String outputFile = basename(inputFile) - # Capture .fa¸ .fna and .fasta - String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" - # This executes both picard and samtools, so indexes are co-located in the same folder. - command <<< - set -e - cp ~{inputFile} ~{outputFile} - picard -Xmx~{javaXmx} \ - -XX:ParallelGCThreads=1 \ - CreateSequenceDictionary \ - REFERENCE=~{inputFile} \ - OUTPUT="~{outputDict}" - samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai - >>> - - output { - File outputFasta = outputFile - File outputFastaDict = outputDict - File outputFastaFai = outputFile + ".fai" - } - - runtime { - memory: memory - # Contains picard 2.27.4, samtools 1.15.1 - docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" - } - - parameter_meta { - # inputs - inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - # outputs - outputFasta: {description: "Fasta file that is co-located with the indexes"} - outputFastaFai: {description: "Fasta index file for the outputFasta file."} - outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - } } \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index bee38d11..d5e3ce0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -66,6 +66,49 @@ task BgzipAndIndex { } } +task DictAndFaidx { + input { + File inputFile + String javaXmx = "2G" + String memory = "3GiB" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } + + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" + # This executes both dict and faidx, so indexes are co-located in the same folder. + command <<< + set -e + cp ~{inputFile} ~{outputFile} + samtools dict -o ~{outputDict} ~{outputFile} + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputDict + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task Faidx { input { File inputFile From 61161df05a65d5a3f3427d381254988208266c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:45:26 +0200 Subject: [PATCH 229/439] Add time_minutes dockerimage and update parameter_meta --- biowdl.wdl | 2 +- bwa.wdl | 16 ++++++++++++++-- samtools.wdl | 4 +++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index 463dab75..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,4 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} \ No newline at end of file +} diff --git a/bwa.wdl b/bwa.wdl index 8f694b45..e1e61bbe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -118,6 +118,8 @@ struct BwaIndex { task Index { input { File fasta + String dockerImage = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + Int? timeMinutes = 5 + ceil(size(fasta, "G") * 5) } String indexedFile = basename(fasta) @@ -141,8 +143,18 @@ task Index { } runtime { - docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + docker: dockerImage cpu: 1 memory: "~{size(fasta, 'G') + 1}GiB" + time_minutes: timeMinutes + } + parameter_meta { + # inputs + fasta: {description: "Reference fasta file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + index: {description: "The produced BWA index."} } -} \ No newline at end of file +} diff --git a/samtools.wdl b/samtools.wdl index d5e3ce0e..76a07ef5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -71,6 +71,7 @@ task DictAndFaidx { File inputFile String javaXmx = "2G" String memory = "3GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -101,11 +102,12 @@ task DictAndFaidx { inputFile: {description: "The input fasta file.", category: "required"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputFasta: {description: "Fasta file that is co-located with the indexes"} outputFastaFai: {description: "Fasta index file for the outputFasta file."} outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 3c53b47f4ba4e2c75fc104dabe972a50332552e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 12:25:53 +0200 Subject: [PATCH 230/439] Add @DavyCats' suggestions --- bwa.wdl | 1 + samtools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index e1e61bbe..66b8e8cc 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -148,6 +148,7 @@ task Index { memory: "~{size(fasta, 'G') + 1}GiB" time_minutes: timeMinutes } + parameter_meta { # inputs fasta: {description: "Reference fasta file.", category: "required"} diff --git a/samtools.wdl b/samtools.wdl index 76a07ef5..df712e51 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -95,6 +95,8 @@ task DictAndFaidx { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes + cpu: 1 } parameter_meta { From 0632414b9ae0663431e8a25b35463c9aa83badbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 4 Nov 2022 11:03:34 +0100 Subject: [PATCH 231/439] typo --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b522c02c..daf79c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ version 5.1.0-dev + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. - + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From 4a42403fb4bf27ba21f63b99c7cb75f9d13adfeb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 8 Nov 2022 16:33:30 +0100 Subject: [PATCH 232/439] Fallback to copying when hardlinking does not work --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index df712e51..587a53fb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,7 +332,7 @@ task Index { if [ ! -f ~{outputPath} ] then mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} + ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi samtools index ~{outputPath} ~{bamIndexPath} ' @@ -531,7 +531,7 @@ task Tabix { mkdir -p "$(dirname ~{outputFilePath})" if [ ! -f ~{outputFilePath} ] then - ln ~{inputFile} ~{outputFilePath} + ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi tabix ~{outputFilePath} -p ~{type} } From daf19317d6f5aafc4e156910393f8bf02c012199 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Nov 2022 15:27:37 +0100 Subject: [PATCH 233/439] remove second breakends in gridss AnnotateSvTypes script --- gridss.wdl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index cfe53751..9a09bdde 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,9 +119,14 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) - geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) - writeVcf(vcf, out_path, index=~{index}) + # GRIDSS doesn't supply a GT, simply set it to 0/1 + geno(vcf)$GT <- "0/1" + # Select only one breakend per event (also removes single breakends): + # sourceId ends with o or h for paired breakends, the first in the pair + # end with o the second with h. Single breakend end with b, these will + # also be removed since we can't determine the SVTYPE. + gr2 <- gr[grepl(".*o$", gr$sourceId)] + writeVcf(vcf[gr2$sourceId], out_path, index=~{index}) EOF >>> From 9cf522d5cf766ef7943226e8d4807643ee93721d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Nov 2022 16:00:25 +0100 Subject: [PATCH 234/439] fix typing issue in AnnotateSvTypes R code --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 9a09bdde..8e1474c1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -120,7 +120,7 @@ task AnnotateSvTypes { svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype # GRIDSS doesn't supply a GT, simply set it to 0/1 - geno(vcf)$GT <- "0/1" + geno(vcf)$GT <- as.matrix(sapply(row.names(vcf), function(x) {"0/1"})) # Select only one breakend per event (also removes single breakends): # sourceId ends with o or h for paired breakends, the first in the pair # end with o the second with h. Single breakend end with b, these will From 2e1c9972b01922cd915b7041b230e6287dda778b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Nov 2022 13:40:06 +0100 Subject: [PATCH 235/439] fix issue where fastp errors if split is set to 1 --- fastp.wdl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 7f269d81..db4a2d40 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -41,11 +41,15 @@ task Fastp { String memory = "50GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + + Int? noneInt } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt + command <<< set -e mkdir -p $(dirname ~{outputPathR1}) @@ -54,8 +58,8 @@ task Fastp { mkdir -p $(dirname ~{jsonPath}) # predict output paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ -i ~{read1} \ ~{"-I " + read2} \ @@ -66,21 +70,21 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{select_first([split, threads])} \ - ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} \ + --thread ~{select_first([effectiveSplit, threads])} \ + ~{"--split " + effectiveSplit} \ + ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] - Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] + Array[File] clippedR1 = if defined(effectiveSplit) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(effectiveSplit) then read_lines("r2_paths") else [outputPathR2] } runtime { - cpu: select_first([split, threads]) + cpu: select_first([effectiveSplit, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage From 636b1f0ea31168d9001ea7b45efe6d3333d944a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:12:45 +0100 Subject: [PATCH 236/439] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index daf79c8f..2c4cff52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS AnnotateSvTypes task now also removes the second breakend of + the breakpoints and single breakends. This will prepare the output better + to be passed into survivor. + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. From b382cf745b6d7ed389bbca4efdfa70e37070d835 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:23:00 +0100 Subject: [PATCH 237/439] adjusted runtime attributes for clever tasks --- CHANGELOG.md | 6 ++++++ clever.wdl | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4cff52..d2e95f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Mateclever's runtime attribute defaults were changed to: + + memory: `"250GiB"` + + timeMinutes: `2880` ++ Clever's Prediction task's runtime attribute defaults were changed to: + + memory: `"80GiB"` + + timeMinutes: `2200` + The GRIDSS AnnotateSvTypes task now also removes the second breakend of the breakpoints and single breakends. This will prepare the output better to be passed into survivor. diff --git a/clever.wdl b/clever.wdl index 791a0ba1..3b819ed2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,8 +34,8 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15GiB" - Int timeMinutes = 600 + String memory = "250GiB" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,8 +94,8 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55GiB" - Int timeMinutes = 480 + String memory = "80GiB" + Int timeMinutes = 2200 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } From 522f2046d07479d1964de103f8d75a190a4a5292 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 11:48:50 +0100 Subject: [PATCH 238/439] increase time for Amber --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5776dfed..3b09beb9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "70GiB" String javaXmx = "64G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 78e02137e639dc35e24c6c9ac08a1efedfda7ebd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 17:12:07 +0100 Subject: [PATCH 239/439] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3b09beb9..e051dc99 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70GiB" - String javaXmx = "64G" + String memory = "85GiB" + String javaXmx = "80G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 1a80829e5bc6b9f607d3cb748f7af6c47e90f8bf Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 16:37:32 +0100 Subject: [PATCH 240/439] Add targets file input to samtools view --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 587a53fb..8503777c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,7 @@ task View { Int? excludeFilter Int? excludeSpecificFilter Int? MAPQthreshold + File? targetFile Int threads = 1 String memory = "1GiB" @@ -593,6 +594,7 @@ task View { ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ ~{"--threads " + (threads - 1)} \ + ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} } From 1ad000b1370898459d2ef3d6e2b3939699874c4f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 17:07:32 +0100 Subject: [PATCH 241/439] update samtools containers --- samtools.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 8503777c..303f9821 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -463,7 +463,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -576,7 +576,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputIndexPath = basename(outputFileName) + ".bai" From d686e0870442c002b7902e9a8f33467dc404fa14 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:15:45 +0100 Subject: [PATCH 242/439] Add parameter_meta for targetFile --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 303f9821..771a9969 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -621,6 +621,7 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} + targetFile: {description: "A BED file with regions to include", caegory: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From b52e3250eb5823b0ddbe4363eb3a77ab798d6fd0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:17:38 +0100 Subject: [PATCH 243/439] Update changelog with samtools change --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2e95f60..c6b5e609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Update samtools image to version 1.16. ++ Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: + memory: `"250GiB"` + timeMinutes: `2880` From e1abb7dc92090bb836b6468be9ae33dc1696a44d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:21:22 +0100 Subject: [PATCH 244/439] Use latest version of scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From 9fce64caa41bf1cd0ec5e43337a31f3c8a8466cf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Jan 2023 12:07:38 +0100 Subject: [PATCH 245/439] add memory runtime attribute to tabix task --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 771a9969..fbb445e7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -542,6 +542,7 @@ task Tabix { } runtime { + memory: "2GiB" time_minutes: timeMinutes docker: dockerImage } From 5f5d51a3515b78c0d290e23a022255207c95bb7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 24 Jan 2023 16:37:48 +0100 Subject: [PATCH 246/439] add various tasks for somatic SV calling --- delly.wdl | 65 ++++++++++++++++++--- gridss.wdl | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 214 insertions(+), 12 deletions(-) diff --git a/delly.wdl b/delly.wdl index 7333c5ff..fab32784 100644 --- a/delly.wdl +++ b/delly.wdl @@ -22,15 +22,17 @@ version 1.0 task CallSV { input { - File bamFile - File bamIndex + Array[File]+ bamFile + Array[File]+ bamIndex File referenceFasta File referenceFastaFai String outputPath = "./delly/delly.bcf" + File? genotypeBcf + String memory = "15GiB" Int timeMinutes = 300 - String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } command { @@ -39,7 +41,8 @@ task CallSV { delly call \ -o ~{outputPath} \ -g ~{referenceFasta} \ - ~{bamFile} + ~{"-v " + genotypeBcf} \ + ~{sep=" " bamFile} } output { @@ -54,11 +57,12 @@ task CallSV { parameter_meta { # inputs - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + bamFile: {description: "The bam files to process.", category: "required"} + bamIndex: {description: "The indexes for the bam files.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -67,3 +71,50 @@ task CallSV { dellyBcf: {description: "File containing structural variants."} } } + + +task SomaticFilter { + input { + File dellyBcf + Array[String]+ normalSamples + Array[String]+ tumorSamples + String outputPath = "./delly/delly_filter.bcf" + + String memory = "15GiB" + Int timeMinutes = 300 + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + for SAMPLE in ~{sep=" " normalSamples}; do echo -e "${SAMPLE}\tcontrol" >> samples.tsv; done + for SAMPLE in ~{sep=" " tumorSamples}; do echo -e "${SAMPLE}\ttumor" >> samples.tsv; done + + delly filter \ + -f somatic \ + -o ~{outputPath} \ + -s samples.tsv \ + ~{dellyBcf} + >>> + + output { + File filterBcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} + tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file diff --git a/gridss.wdl b/gridss.wdl index 8e1474c1..647f2d67 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -152,11 +152,108 @@ task AnnotateSvTypes { } } +task FilterPon { + input { + File ponBed + File ponBedpe + Int minimumScore = 3 + String outputDir = "." + + String memory = "1GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 20 + } + + command { + set -e + mkdir -p ~{outputDir} + + cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed + cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GeneratePonBedpe { + input { + Array[File]+ vcfFiles + Array[File]+ vcfIndexes + File referenceFasta + String outputDir = "." + + Int threads = 8 + String javaXmx = "8G" + String memory = "9GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 120 + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} \ + -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ + gridss.GeneratePonBedpe \ + INPUT=~{sep=" INPUT=" vcfFiles} \ + O=~{outputDir}/gridss_pon_breakpoint.bedpe \ + SBO=~{outputDir}/gridss_pon_single_breakend.bed \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + THREADS=~{threads} + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { - File tumorBam - File tumorBai - String tumorLabel + Array[File]+ tumorBam + Array[File]+ tumorBai + Array[String]+ tumorLabel BwaIndex reference String outputPrefix = "gridss" @@ -184,10 +281,10 @@ task GRIDSS { ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ - ~{tumorBam} + ~{sep=" " tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai # For some reason the VCF index is sometimes missing @@ -283,6 +380,60 @@ task GridssAnnotateVcfRepeatmasker { } } +task SomaticFilter { + input { + File vcfFile + File vcfIndex + File ponBed + File ponBedpe + String outputPath = "./high_confidence_somatic.vcf.gz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + + String memory = "16GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 60 + } + + command { + set -e + mkdir -p $(dirname ~{outputPath}) + mkdir -p $(dirname ~{fullOutputPath}) + + gridss_somatic_filter \ + --pondir ~{dirname(ponBed)} \ + --input ~{vcfFile} \ + --output ~{outputPath} \ + --fulloutput ~{fullOutputPath} + } + + output { + File fullVcf = fullOutputPath + File fullVcfIndex = "~{fullOutputPath}.tbi" + File highConfidenceVcf = outputPath + File highConfidenceVcfIndex = "~{outputPath}.tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFile: {description: "The GRIDSS VCF file.", category: "required"} + vcfIndex: {description: "The index for the GRIDSS VCF file.", category: "required"} + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + outputPath: {description: "The path the high confidence output should be written to.", category: "common"} + fullOutputPath: {description: "The path the full output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Virusbreakend { input { File bam From 90bcc945807e9ef2c13fbd542d69f3b912995a0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 25 Jan 2023 14:06:10 +0100 Subject: [PATCH 247/439] fix lint issues --- gridss.wdl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 647f2d67..82ac7fbd 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,13 +164,13 @@ task FilterPon { Int timeMinutes = 20 } - command { + command <<< set -e mkdir -p ~{outputDir} cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe - } + >>> output { File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" @@ -189,8 +189,6 @@ task FilterPon { minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} outputDir: {description: "The directory the output will be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -394,13 +392,15 @@ task SomaticFilter { Int timeMinutes = 60 } + String ponDir = sub(ponBed, basename(ponBed), "") + command { set -e mkdir -p $(dirname ~{outputPath}) mkdir -p $(dirname ~{fullOutputPath}) gridss_somatic_filter \ - --pondir ~{dirname(ponBed)} \ + --pondir ~{ponDir} \ --input ~{vcfFile} \ --output ~{outputPath} \ --fulloutput ~{fullOutputPath} @@ -414,7 +414,6 @@ task SomaticFilter { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage From 34b3732319f7d74c72f93ff1bcb05ccc675585f8 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:47:10 +0100 Subject: [PATCH 248/439] Add a number of macs2 flags so we can adhere to Encode --- macs2.wdl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index e6a011ad..53be0abd 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -30,8 +30,15 @@ task PeakCalling { String sampleName String format = "AUTO" Boolean nomodel = false + String gensz = "hs" + Int extsize + Int shiftsize = -1*round(extsize/2) + Float pval_thres = 0.01 + Boolean bdg = true + String keepdup = "auto" + String callsummits = true Int timeMinutes = 600 # Default to 10 hours - String memory = "8GiB" + String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -43,7 +50,14 @@ task PeakCalling { --outdir ~{outDir} \ --name ~{sampleName} \ -f ~{format} \ - ~{true='--nomodel' false='' nomodel} + -g ~{gensz} \ + -p ~{pval_thres} \ + --shift ~{shiftsize} \ + --extsize ~{extsize} \ + ~{true='--nomodel' false='' nomodel} \ + ~{true='-B' false='' bdg} \ + --keep-dup ~{keepdup} \ + ~{true='--call-summits' false='' callsummits} } output { @@ -64,6 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} + bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ae937f28ab0147b572916c97448f6c788fa58e19 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:55:23 +0100 Subject: [PATCH 249/439] Fix data type error --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 53be0abd..854db814 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float pval_thres = 0.01 Boolean bdg = true String keepdup = "auto" - String callsummits = true + Boolean callsummits = true Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 2dca5f3611fd3aef0ee501cbe05467b590c93280 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 17:29:04 +0100 Subject: [PATCH 250/439] Address comments from Ruben --- macs2.wdl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 854db814..7b11c99f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,16 +29,16 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean nomodel = false - String gensz = "hs" - Int extsize - Int shiftsize = -1*round(extsize/2) - Float pval_thres = 0.01 - Boolean bdg = true - String keepdup = "auto" - Boolean callsummits = true + Boolean? nomodel + String? gensz + Int? extsize + Int? shiftsize = -1*round(extsize/2) + Float? pval_thres + Boolean? bdg + String? keepdup + Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - -f ~{format} \ - -g ~{gensz} \ - -p ~{pval_thres} \ - --shift ~{shiftsize} \ - --extsize ~{extsize} \ + ~{"-f" + format} \ + ~{"-g" + gensz} \ + ~{"-p" + pval_thres} \ + ~{"--shift" + shiftsize} \ + ~{"--extsize" + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - --keep-dup ~{keepdup} \ + ~{"--keep-dup" + keepdup} \ ~{true='--call-summits' false='' callsummits} } From e89b1d7d13fef289ba17ee0f6acc8e8b5415a217 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Thu, 26 Jan 2023 10:31:15 +0100 Subject: [PATCH 251/439] Delete calculation for shiftsize --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 7b11c99f..8d89f3af 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -32,7 +32,7 @@ task PeakCalling { Boolean? nomodel String? gensz Int? extsize - Int? shiftsize = -1*round(extsize/2) + Int? shiftsize Float? pval_thres Boolean? bdg String? keepdup From e996878ae65113bc66add0caaf7b5d9efc75ad73 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:13 +0100 Subject: [PATCH 252/439] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 8d89f3af..70fea707 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean? nomodel + Boolean nomodel = false String? gensz Int? extsize Int? shiftsize From 055246a9082ec004ab335c7525685c888fd6e27f Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:24 +0100 Subject: [PATCH 253/439] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 70fea707..2c3bf57c 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -34,7 +34,7 @@ task PeakCalling { Int? extsize Int? shiftsize Float? pval_thres - Boolean? bdg + Boolean bdg = false String? keepdup Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours From 72bbcce9084408ee7ba68a04dd8f121a8a793390 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:35 +0100 Subject: [PATCH 254/439] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 2c3bf57c..c4c08ed5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float? pval_thres Boolean bdg = false String? keepdup - Boolean? callsummits + Boolean callsummits = false Int timeMinutes = 600 # Default to 10 hours String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 4b9754f548b8558e7de2652e257edd807d0d4ffa Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:43 +0100 Subject: [PATCH 255/439] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index c4c08ed5..9d5344ae 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -78,13 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} - gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} - pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} - shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} - extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} - bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} - keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} - callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced.", category: "advanced"} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value.", category: "advanced"} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction.", category: "advanced"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments.", category: "advanced"} + bdg: {description: "macs2 argument that enables the storage of the fragment pileup, control lambda in bedGraph files.", category: "advanced"} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location.", category: "advanced"} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ece0782a37451b82677eedd1ed771d823b56e891 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 11:26:19 +0100 Subject: [PATCH 256/439] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6b5e609..4962c687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + + nomodel + + gensz + + extsize + + shiftsize + + pval_thres + + bdg + + keepdup + + callsummits + Update samtools image to version 1.16. + Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: From 2b4fb7ea3fc9270af1caaea897f35d2b319c35fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 26 Jan 2023 14:32:37 +0100 Subject: [PATCH 257/439] add missing paramter_meta --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 82ac7fbd..8b27df77 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -238,6 +238,7 @@ task GeneratePonBedpe { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 6d0329539033821b68ef31234ae7d6f920505aed Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:41:54 +0100 Subject: [PATCH 258/439] Add space between flag and the value following --- macs2.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 9d5344ae..5ccc5a5f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - ~{"-f" + format} \ - ~{"-g" + gensz} \ - ~{"-p" + pval_thres} \ - ~{"--shift" + shiftsize} \ - ~{"--extsize" + extsize} \ + ~{"-f " + format} \ + ~{"-g " + gensz} \ + ~{"-p " + pval_thres} \ + ~{"--shift " + shiftsize} \ + ~{"--extsize " + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - ~{"--keep-dup" + keepdup} \ + ~{"--keep-dup " + keepdup} \ ~{true='--call-summits' false='' callsummits} } From b79e59b1f3279bfcb26446ee5c95f1c6bfb4b16e Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:44:00 +0100 Subject: [PATCH 259/439] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4962c687..bd66a6ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel + gensz From ee0b137664a20f94997e9daad8b25cc2729dc88a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:25:12 +0100 Subject: [PATCH 260/439] increase time for manta, add index to delly outputs --- delly.wdl | 2 ++ manta.wdl | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/delly.wdl b/delly.wdl index fab32784..43af3ca0 100644 --- a/delly.wdl +++ b/delly.wdl @@ -47,6 +47,7 @@ task CallSV { output { File dellyBcf = outputPath + File dellyBcfIndex = outputPath + ".csi" } runtime { @@ -100,6 +101,7 @@ task SomaticFilter { output { File filterBcf = outputPath + File filterBcfIndex = outputPath + ".csi" } runtime { diff --git a/manta.wdl b/manta.wdl index 6804f304..fde8c208 100644 --- a/manta.wdl +++ b/manta.wdl @@ -34,7 +34,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -102,7 +102,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } From 1bf7725df8ff78628b3444d8ab6b6daa044836fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:28:02 +0100 Subject: [PATCH 261/439] add bcf index input for delly somatic filter --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index 43af3ca0..ab72f060 100644 --- a/delly.wdl +++ b/delly.wdl @@ -77,6 +77,7 @@ task CallSV { task SomaticFilter { input { File dellyBcf + File dellyBcfIndex Array[String]+ normalSamples Array[String]+ tumorSamples String outputPath = "./delly/delly_filter.bcf" From 9af2205811e0708be46be8e88bc1c7e1387fdfda Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:33:30 +0100 Subject: [PATCH 262/439] add index to delly call inputs as well --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index ab72f060..12e68187 100644 --- a/delly.wdl +++ b/delly.wdl @@ -29,6 +29,7 @@ task CallSV { String outputPath = "./delly/delly.bcf" File? genotypeBcf + File? genotypeBcfIndex String memory = "15GiB" Int timeMinutes = 300 From 71193e8da89c9275c7f6d878e349f1bdc19543ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:35:07 +0100 Subject: [PATCH 263/439] update parameter_meta --- delly.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 12e68187..2dc847b9 100644 --- a/delly.wdl +++ b/delly.wdl @@ -64,7 +64,8 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output BCF file should be written.", category: "common"} - genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples.", category: "advanced"} + genotypeBcfIndex: {description: "The index for the genotype BCF file.", category: "advanced"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -114,6 +115,7 @@ task SomaticFilter { parameter_meta { dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + dellyBcfIndex: {description: "The index for the delly BCF file.", category: "required"} normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} outputPath: {description: "The location the output BCF file should be written.", category: "common"} From dd9ea3db69c56bef6c1d5ed63c08e10e691c6d5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 11:13:40 +0100 Subject: [PATCH 264/439] give delly more time, specify normal ordinal in gridss GeneratePonBedpe command --- delly.wdl | 2 +- gridss.wdl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 2dc847b9..b952da7e 100644 --- a/delly.wdl +++ b/delly.wdl @@ -32,7 +32,7 @@ task CallSV { File? genotypeBcfIndex String memory = "15GiB" - Int timeMinutes = 300 + Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } diff --git a/gridss.wdl b/gridss.wdl index 8b27df77..5c203a16 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -216,6 +216,7 @@ task GeneratePonBedpe { -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ gridss.GeneratePonBedpe \ INPUT=~{sep=" INPUT=" vcfFiles} \ + NO=0 \ O=~{outputDir}/gridss_pon_breakpoint.bedpe \ SBO=~{outputDir}/gridss_pon_single_breakend.bed \ REFERENCE_SEQUENCE=~{referenceFasta} \ From 48340415ab9c852ceefaf35e2b4e2ae8b47d3f66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 14:34:06 +0100 Subject: [PATCH 265/439] add missing fasta index input to gridss GeneratePonBedpe --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 5c203a16..03fdc6ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -200,6 +200,7 @@ task GeneratePonBedpe { Array[File]+ vcfFiles Array[File]+ vcfIndexes File referenceFasta + File referenceFastaFai String outputDir = "." Int threads = 8 @@ -238,6 +239,7 @@ task GeneratePonBedpe { parameter_meta { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceFastaFai: {description: "The index for the reference genome fasta.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 42796e37927b50b2dc25249a5ff92348ebf54ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:03:47 +0100 Subject: [PATCH 266/439] fix output paths gridss somatic filter --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 03fdc6ab..b67f4c91 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.gz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + String outputPath = "./high_confidence_somatic.vcf.bgz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" From b17076a642b17212499b6478e948661b0e9433c3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:23:37 +0100 Subject: [PATCH 267/439] fix gridss somatic filter output paths? --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b67f4c91..5aca3825 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.bgz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" + String outputPath = "./high_confidence_somatic.vcf" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -411,10 +411,10 @@ task SomaticFilter { } output { - File fullVcf = fullOutputPath - File fullVcfIndex = "~{fullOutputPath}.tbi" - File highConfidenceVcf = outputPath - File highConfidenceVcfIndex = "~{outputPath}.tbi" + File fullVcf = "~{fullOutputPath}.bgz" + File fullVcfIndex = "~{fullOutputPath}.bgz.tbi" + File highConfidenceVcf = "~{outputPath}.bgz" + File highConfidenceVcfIndex = "~{outputPath}.bgz.tbi" } runtime { From d320b3c79bfc321fff1178ff571af520b7969043 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Feb 2023 14:11:59 +0100 Subject: [PATCH 268/439] add samples option to bcftools view --- bcftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 726d2e37..7df8911d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + Array[String] samples = [] String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -364,6 +365,7 @@ task View { ~{"--exclude " + exclude} \ ~{"--include " + include} \ ~{true="--exclude-uncalled" false="" excludeUncalled} \ + ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -389,6 +391,7 @@ task View { include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 3961ab4e858d31163987bb267cbad30ea085b205 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:51:45 +0100 Subject: [PATCH 269/439] Allow a custom separator char --- umi.wdl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index e7f01fc2..e4270ed6 100644 --- a/umi.wdl +++ b/umi.wdl @@ -29,6 +29,7 @@ task BamReadNameToUmiTag { File inputBam String outputPath = "output.bam" String umiTag = "RX" + String separatorChar = "_" String memory = "2GiB" Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) @@ -45,26 +46,26 @@ task BamReadNameToUmiTag { from typing import Tuple - def split_umi_from_name(name) -> Tuple[str, str]: + def split_umi_from_name(name, separator_char = "_") -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) id = id_and_rest[0] # If there was no whitespace id_and_rest will have length 1 other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" - underscore_index = id.rfind("_") + underscore_index = id.rfind(separator_char) umi = id[underscore_index + 1:] new_id = id[:underscore_index] if other_parts: return " ".join([new_id, other_parts]), umi return new_id, umi - def annotate_umis(in_file, out_file, bam_tag="RX"): + def annotate_umis(in_file, out_file, bam_tag="RX", separator_char = "_"): in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) + new_name, umi = split_umi_from_name(segment.query_name, separator_char) segment.query_name = new_name # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. # Value type has to be a string though, otherwise pysam crashes. @@ -72,7 +73,7 @@ task BamReadNameToUmiTag { out_bam.write(segment) if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}", "~{separatorChar}") pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> @@ -93,6 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 84a8781c4c94be08ba0f404902378d05db18fef9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:56:03 +0100 Subject: [PATCH 270/439] Update changelog with separatorChar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd66a6ba..4bab712a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel From b3c9204b77851836042190486f8031dbe79a9e2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:57:25 +0100 Subject: [PATCH 271/439] Add missing interpunction --- umi.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index e4270ed6..0628783a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -94,7 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} - separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c3f246f24d05bda4ebfa781cff41dfe61bbf85b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:05:31 +0100 Subject: [PATCH 272/439] update changelog --- CHANGELOG.md | 2 ++ scripts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..3021817d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a task for fastp. ++ Add a task for picard CollectInsertSizeMetrics. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 From ad97efa05229f147435ee0800b0a742a2c360435 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:06:00 +0100 Subject: [PATCH 273/439] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From 669428627e26aaaafdba3ab680a37236eaa736da Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:13 +0100 Subject: [PATCH 274/439] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..6e1daf97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,14 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. ++ Add a task for GRIDSS somatic filtering. ++ Add a task to generate a panel of normals BED and BEDPE file for GRIDSS. ++ Add a task to filter a GRIDSS PON. ++ Add a task for delly somatic filtering. ++ Delly CallSV's `bamFile` and `bamIndex` inputs are not arrays of files, allowing + for multiple samples to be included. ++ Add `samples` input to bcftools view to select samples included in the output vcf. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: From 7b9e07652461788748ed4907dd8264cbbb27ce80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:49 +0100 Subject: [PATCH 275/439] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 84690a30..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From eba9ad4c057cf7468bd7982930af484765d1a257 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Feb 2023 15:55:01 +0100 Subject: [PATCH 276/439] add some options to disable filters in fastp --- fastp.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index db4a2d40..68c0e5cd 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -36,6 +36,8 @@ task Fastp { Int lengthRequired = 15 Int? split Boolean performAdapterTrimming = true + Boolean performQualityFiltering = true + Boolean performLengthFiltering = true Int threads = 4 String memory = "50GiB" @@ -73,7 +75,9 @@ task Fastp { --thread ~{select_first([effectiveSplit, threads])} \ ~{"--split " + effectiveSplit} \ ~{if defined(effectiveSplit) then "-d 0" else ""} \ - ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ + ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ + ~{if performLengthFiltering then "" else "--disable_length_filtering"} >>> output { @@ -102,6 +106,8 @@ task Fastp { lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} + performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} + performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From ab54bb588cd66f009df79bbf00b2238f0436fad6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 14:11:06 +0100 Subject: [PATCH 277/439] add option to enable/disable ploy-g trimming to fastp task --- fastp.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 68c0e5cd..9849738b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,6 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Boolean performQualityFiltering = true Boolean performLengthFiltering = true + Boolean? performPolyGTrimming Int threads = 4 String memory = "50GiB" @@ -50,6 +51,11 @@ task Fastp { String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + String polyGTrimmingFlag = if defined(performPolyGTrimming) + then + if select_first([performPolyGTrimming]) then "--trim_poly_g" else "--disable_trim_poly_g" + else "" + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt command <<< @@ -77,7 +83,8 @@ task Fastp { ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ - ~{if performLengthFiltering then "" else "--disable_length_filtering"} + ~{if performLengthFiltering then "" else "--disable_length_filtering"} \ + ~{polyGTrimmingFlag} >>> output { @@ -108,6 +115,7 @@ task Fastp { performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} + performPolyGTrimming: {description: "Whether or not poly-G-tail trimming should be performed. If undefined fastp's default behaviour will be used, ie. enabled for NextSeq/NovaSeq data as detected from read headers.", category: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 5d35105b452167ab9e09a9b0d9c041d2af84f253 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 16:30:34 +0100 Subject: [PATCH 278/439] add purple options needed for shallow mode --- hmftools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index e051dc99..78156f67 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1070,6 +1070,8 @@ task Purple { File driverGenePanel File somaticHotspots File germlineHotspots + Float? highlyDiploidPercentage + Float? somaticMinPuritySpread #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1103,6 +1105,8 @@ task Purple { -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ -driver_gene_panel ~{driverGenePanel} \ + ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \ + ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \ -threads ~{threads} } From 36a4575e20c54b062995b96c24f68733affce707 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Mar 2023 11:14:24 +0100 Subject: [PATCH 279/439] update parameter_meta and changelog --- CHANGELOG.md | 2 ++ hmftools.wdl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce03ffc..753daf30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the + hmtools PURPLE task. + Add a task for fastp. + Add a task for picard CollectInsertSizeMetrics. + Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. diff --git a/hmftools.wdl b/hmftools.wdl index 78156f67..c27630a1 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1186,6 +1186,8 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + highlyDiploidPercentage: {description: "Equivalent to PURPLE's `-highly_diploid_percentage` option.", category: "advanced"} + somaticMinPuritySpread: {description: "Equivalent to PURPLE's `-somatic_min_purity_spread` option.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 1a57c2ed292504f138d8bb15ae145b7145ba6c1c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:08:41 +0200 Subject: [PATCH 280/439] Set stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 753daf30..7e62171b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.1.0-dev +version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the hmtools PURPLE task. From 9394a3e29a0227e3dc1dc30700ad1d7e65b7e448 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:10:35 +0200 Subject: [PATCH 281/439] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 09b254e9..91ff5727 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.0.0 +5.2.0 From 64aa91e7db5e96625122b4484fb7d857a9ef2c13 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:32:09 +0200 Subject: [PATCH 282/439] Update cutadapt and FastQC --- CHANGELOG.md | 6 ++++++ cutadapt.wdl | 2 +- fastqc.wdl | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e62171b..a13b2f6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> + +version 5.2.0-dev +--------------------------- ++ Update cutadapt version to 4.4 ++ Update FastQC version to 0.12.1 + version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the diff --git a/cutadapt.wdl b/cutadapt.wdl index 9a67692c..191e6f0a 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:4.4--py310h1425a21_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/fastqc.wdl b/fastqc.wdl index d821e531..59592d4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" + String dockerImage = "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" Array[File]? noneArray File? noneFile From 5cf560b5a9e69ba683c431193c330fdb7a41c028 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:55:44 +0200 Subject: [PATCH 283/439] Update classpath --- fastqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index 59592d4e..da31882c 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -67,7 +67,7 @@ task Fastqc { command <<< set -e mkdir -p "~{outdirPath}" - FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" + FASTQC_DIR="/usr/local/opt/fastqc-0.12.1" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ -Xms200M -Xmx~{javaXmx} \ From 0ed76c14ffe5ab4779ed42f924fbcab1acdda266 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:46:55 +0200 Subject: [PATCH 284/439] Stable version in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a13b2f6c..1551d13d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.2.0-dev +version 5.2.0 --------------------------- + Update cutadapt version to 4.4 + Update FastQC version to 0.12.1 From 73f769bb966f67b9bf3fd72b9f5c4d6f923ccafa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:52:47 +0200 Subject: [PATCH 285/439] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 91ff5727..03f488b0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.2.0 +5.3.0 From 0062b727197ae2601b234d7a69ae0f64bd7b59d1 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:01:16 +0200 Subject: [PATCH 286/439] Add revcomp flag to cutadapt --- CHANGELOG.md | 5 +++++ cutadapt.wdl | 3 +++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1551d13d..5eb2ef17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.3.0-dev +--------------------------- ++ Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. + + version 5.2.0 --------------------------- + Update cutadapt version to 4.4 diff --git a/cutadapt.wdl b/cutadapt.wdl index 191e6f0a..a164e360 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,6 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap + Boolean revcomp = false Int cores = 4 String memory = "5GiB" @@ -149,6 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ + ~{if revcomp then "--revcomp" else ""} ~{read1} \ ~{read2} \ ~{"> " + reportPath} @@ -231,6 +233,7 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} + revcomp: {description: "Equivalent to cutadapt's --revcomp flag.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 502d73003072327d9756b4b2ce0c2f768ff1192a Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:02:14 +0200 Subject: [PATCH 287/439] add missing backslash --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index a164e360..c695c08e 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -150,7 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ - ~{if revcomp then "--revcomp" else ""} + ~{if revcomp then "--revcomp" else ""} \ ~{read1} \ ~{read2} \ ~{"> " + reportPath} From cebb1b535be90193ed27c57f3ea2c659f20bfe39 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:20:13 +0200 Subject: [PATCH 288/439] add a task for fastqFilter --- fastqFilter.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 fastqFilter.wdl diff --git a/fastqFilter.wdl b/fastqFilter.wdl new file mode 100644 index 00000000..d436b1ab --- /dev/null +++ b/fastqFilter.wdl @@ -0,0 +1,66 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2023 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task FastqFilter { + input { + Array[File]+ fastq + Array[String]+ outputPaths + Int? minLength + Int? maxLength + + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(seqFile, "G")) + String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" + } + + command { + set -e + mkdir -p $(dirname ~{sep=" " outputPaths}) + fastq-filter \ + -o ~{sep=" -o " outputPaths} \ + ~{"-l " + minLength} \ + ~{"-L " + maxLength} \ + ~{sep=" " fastq} + } + + output { + Array[File] filtered = outputPaths + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + fastq: {description: "A list of fastq files to filter.", category: "required"} + outputPaths: {description: "A list containing the output paths for each input fastq file.", category: "required"} + minLength: {description: "Equivalent to fastq-filter's `--min-length` option.", category: "common"} + maxLength: {description: "Equivalent to fastq-filter's `--max-length` option.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 93e491d37de5780bea73010323dcef939814cdbc Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:21:47 +0200 Subject: [PATCH 289/439] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eb2ef17..34bf0600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 3fc46b91cc63c31b1477692638492fdda9bbc084 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:32:34 +0200 Subject: [PATCH 290/439] fix copy-paste error --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index d436b1ab..2b2fcc45 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -30,7 +30,7 @@ task FastqFilter { Int? maxLength String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(seqFile, "G")) + Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 2bc4c06dd89444b6ccb42244a566873ba7fad5a2 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Tue, 13 Jun 2023 09:37:54 +0200 Subject: [PATCH 291/439] use 1GiB for fastqFilter --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index 2b2fcc45..3701b8aa 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -29,7 +29,7 @@ task FastqFilter { Int? minLength Int? maxLength - String memory = "4GiB" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 3fb2c1de2e19f68f7a3ab3e205864bff21bb3ba1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <15814544+Redmar-van-den-Berg@users.noreply.github.com> Date: Thu, 7 Sep 2023 08:48:09 +0200 Subject: [PATCH 292/439] Use softlink instead of hardlinks If the database files are on a different filesystem then the analysis folder, hardlinks are not allowed, leading to crashes. --- centrifuge.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 757af239..41a907ae 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -122,7 +122,7 @@ task Classify { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge \ ~{inputFormatOptions[inputFormat]} \ @@ -199,7 +199,7 @@ task Inspect { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-inspect \ ~{outputOptions[printOption]} \ @@ -256,7 +256,7 @@ task KReport { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-kreport \ -x $PWD/${indexBasename} \ From 44cdc1862bf20b1cf77f0fedfb0ba25b3e5efa43 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 7 Sep 2023 08:52:12 +0200 Subject: [PATCH 293/439] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34bf0600..6acbbc85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 7404b0e6f7470c4d04d80f7037f1068ad091d9ba Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 26 Aug 2024 17:07:03 +0200 Subject: [PATCH 294/439] Add a selectGenotype switch --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 0b93efe6..a2aff322 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1514,6 +1514,7 @@ task SelectVariants { Array[File] intervals = [] String? selectTypeToInclude + String? selectGenotype String javaXmx = "4G" String memory = "5GiB" @@ -1529,6 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ + ~{"-select-genotype " + selectGenotype} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From d86d9cb89a8f8b74ad2b714a23e1686fd4f26e3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 27 Aug 2024 10:19:18 +0200 Subject: [PATCH 295/439] Quote select genotype value --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index a2aff322..f272a2f9 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1530,7 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ - ~{"-select-genotype " + selectGenotype} \ + ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 558c9b7d7370b0f46346c16beaa4d4cb3f48b09e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 30 Aug 2024 15:23:55 +0200 Subject: [PATCH 296/439] Add exclude filtered expression --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index f272a2f9..230674a5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1513,6 +1513,7 @@ task SelectVariants { String outputPath = "output.vcf.gz" Array[File] intervals = [] + Boolean excludeFiltered = false String? selectTypeToInclude String? selectGenotype @@ -1531,6 +1532,7 @@ task SelectVariants { -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ + ~{true="--exclude-filtered" false="" excludeFiltered} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 75f36133cb52ce6f02701ff11612f6884a8d1726 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 18 Oct 2024 14:52:33 +0200 Subject: [PATCH 297/439] Use reference files in rtg-tools tasks to make tasks cacheable --- rtg.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/rtg.wdl b/rtg.wdl index 3e9dab9b..62e1e77f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -24,8 +24,7 @@ task Format { input { Array[File]+ inputFiles String format = "fasta" - String outputPath = "seq_data.sdf" - + String outputPath = "reference_data" String rtgMem = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) @@ -41,7 +40,7 @@ task Format { } output { - File sdf = outputPath + Array[File] referenceFiles = glob("~{outputPath}/*") } runtime { @@ -61,7 +60,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} + referenceFiles: {description: "An array with all the generated reference files"} } } @@ -74,7 +73,7 @@ task VcfEval { Boolean squashPloidy = false String outputMode = "split" String outputDir = "output/" - File template + Array[File] referenceFiles Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false @@ -99,7 +98,7 @@ task VcfEval { ~{"--evaluation-regions " + evaluationRegions} \ ~{"--bed-regions " + bedRegions} \ --output ~{outputDir} \ - --template ~{template} \ + --template $(dirname ~{referenceFiles[0]}) \ ~{true="--all-records" false="" allRecords} \ ~{true="--decompose" false="" decompose} \ ~{true="--ref-overlap" false="" refOverlap} \ @@ -152,7 +151,7 @@ task VcfEval { squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} outputDir: {description: "Directory for output.", category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + referenceFiles: {description: "An array of reference Files generated by the Format task.", category: "required"} allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} From 53d5083e5ca9de973eba1916dc273e0ff3dd9e04 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:04:31 +0100 Subject: [PATCH 298/439] Update minimap2 task to output sorted BAM --- minimap2.wdl | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index 96cc7734..47464585 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -81,15 +81,19 @@ task Indexing { task Mapping { input { String presetOption - Int kmerSize = 15 - Boolean skipSelfAndDualMappings = false - Boolean outputSam = false String outputPrefix - Boolean addMDTagToSam = false - Boolean secondaryAlignment = false File referenceFile File queryFile + + Int compressionLevel = 1 + Int additionalSortThreads = 1 + Int sortMemoryGb = 1 + Boolean skipSelfAndDualMappings = false + Boolean addMDTagToSam = false + Boolean secondaryAlignment = true + + Int? kmerSize Int? maxIntronLength Int? maxFragmentLength Int? retainMaxSecondaryAlignments @@ -97,8 +101,8 @@ task Mapping { Int? mismatchPenalty String? howToFindGTAG - Int cores = 4 - String memory = "30GiB" + Int cores = 8 + String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -108,13 +112,11 @@ task Mapping { mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -x ~{presetOption} \ - -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSam} \ - -o ~{outputPrefix} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ + ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ ~{"-N " + retainMaxSecondaryAlignments} \ @@ -122,11 +124,18 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} + ~{queryFile} \ + | samtools sort \ + -@ ~{additionalSortThreads} \ + -l ~{compressionLevel} \ + -m ~{sortMemoryGb}G \ + -o ~{outputPrefix}.bam + samtools index -o ~{outputPrefix}.bam } output { - File alignmentFile = outputPrefix + File bam = ~{outputPrefix}.bam + File bamIndex = ~{outputPrefix}.bam.bai } runtime { From 77506d8d208b524cfb2427314d4568aac75e4b87 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:05:39 +0100 Subject: [PATCH 299/439] Add a flag for namesorting --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 47464585..64313ef4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -88,6 +88,7 @@ task Mapping { Int compressionLevel = 1 Int additionalSortThreads = 1 Int sortMemoryGb = 1 + Boolean nameSorted = false Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false @@ -126,6 +127,7 @@ task Mapping { ~{referenceFile} \ ~{queryFile} \ | samtools sort \ + ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ From e78cfa0c198a65d60f6b1adb3e33878c02e5c90f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:18:46 +0100 Subject: [PATCH 300/439] Add clair3 task --- clair3.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 clair3.wdl diff --git a/clair3.wdl b/clair3.wdl new file mode 100644 index 00000000..eb18d208 --- /dev/null +++ b/clair3.wdl @@ -0,0 +1,61 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Clair3 { + input { + File bam + File bamIndex + File referenceFasta + File referenceFastaFai + String outputPrefix + File? model + String? builtinModel + String platform + Int threads = 8 + Boolean includeAllCtgs = false + String memory = "20GiB" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + } + + # A default set for testing + String modelArg = "~{true=model false=builtinModel, defined(model)}" + + command <<< + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{reference_fasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{true="--include_all_ctgs" false =""} + mv out/merge_output.vcf.gz ~{prefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + >>> + output { + File vcf = "~{outputPrefix}.vcf.gz" + File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" + } + + +} \ No newline at end of file From 0d84d673368819a78296f97f0f5b6c3225439ded Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:20:35 +0100 Subject: [PATCH 301/439] Add sequali and update multiqc to a version that supports it --- multiqc.wdl | 2 +- sequali.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 sequali.wdl diff --git a/multiqc.wdl b/multiqc.wdl index 21fc8a7d..f04a1021 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl new file mode 100644 index 00000000..98700fb7 --- /dev/null +++ b/sequali.wdl @@ -0,0 +1,46 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sequali { + input { + File reads + File? mate_reads + Int threads = 2 + String outDir = "." + dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + } + + command <<< + set -e + mkdir -p $(dirname outputDir) + sequali \ + --outdir ~{outDir} \ + --threads ~{threads} \ + ~{reads} \ + ~{mate_reads} + >>> + + output { + File html = basename(reads) + ".html" + File json = basename(reads) + ".json" + } +} \ No newline at end of file From 272842244d79797615aa430bb6836a8cb78ba8fd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:45:59 +0100 Subject: [PATCH 302/439] Fix womtool validation errors --- clair3.wdl | 16 ++++++++-------- minimap2.wdl | 8 ++++---- sequali.wdl | 9 ++++++++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index eb18d208..6c0c1d38 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -33,25 +33,25 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "20GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } - # A default set for testing - String modelArg = "~{true=model false=builtinModel, defined(model)}" + String modelArg = "~{true=model false=builtinModel defined(model)}" command <<< run_clair3.sh \ --model=~{modelArg} \ - --ref_fn=~{reference_fasta} \ + --ref_fn=~{referenceFasta} \ --bam_fn=~{bam} \ --output=out \ --threads=~{threads} \ --platform=~{platform} \ - ~{true="--include_all_ctgs" false =""} - mv out/merge_output.vcf.gz ~{prefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> + output { File vcf = "~{outputPrefix}.vcf.gz" File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" diff --git a/minimap2.wdl b/minimap2.wdl index 64313ef4..fff5b4ec 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -136,8 +136,8 @@ task Mapping { } output { - File bam = ~{outputPrefix}.bam - File bamIndex = ~{outputPrefix}.bam.bai + File bam = "~{outputPrefix}.bam " + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { @@ -152,7 +152,6 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} @@ -170,6 +169,7 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} + bam: {description: "Mapping and alignment between collections of dna sequences file in BAM format."} + bamIndex: {description: "Accompanying index file for the BAM file."} } } diff --git a/sequali.wdl b/sequali.wdl index 98700fb7..c2eff2c9 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -26,7 +26,7 @@ task Sequali { File? mate_reads Int threads = 2 String outDir = "." - dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" } command <<< @@ -43,4 +43,11 @@ task Sequali { File html = basename(reads) + ".html" File json = basename(reads) + ".json" } + + runtime { + cpu: threads + memory: "2GiB" + docker: dockerImage + time_minutes: 59 + } } \ No newline at end of file From 01ff19c51bf4b8ff28cf16b067bbb128d2d435b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:12:56 +0100 Subject: [PATCH 303/439] Fix runtime issues --- clair3.wdl | 12 ++++++++++-- minimap2.wdl | 12 +++++++----- multiqc.wdl | 2 +- sequali.wdl | 4 ++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 6c0c1d38..2d111a5d 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,12 +34,14 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{true=model false=builtinModel defined(model)}" + String modelArg = "~{if defined(model) then model else builtinModel}" command <<< + set -e + mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ --ref_fn=~{referenceFasta} \ @@ -57,5 +59,11 @@ task Clair3 { File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" } + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } } \ No newline at end of file diff --git a/minimap2.wdl b/minimap2.wdl index fff5b4ec..5709c998 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -105,13 +105,15 @@ task Mapping { Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + # Minimap 2.28 samtools 1.20 + String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ + -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ @@ -125,19 +127,19 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} \ + ~{queryFile} \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam - samtools index -o ~{outputPrefix}.bam + samtools index ~{outputPrefix}.bam } output { - File bam = "~{outputPrefix}.bam " - File bamIndex = "~{outputPrefix}.bam.bai" + File bam = "~{outputPrefix}.bam" + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { diff --git a/multiqc.wdl b/multiqc.wdl index f04a1021..a2e32cdb 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl index c2eff2c9..ed6e5d40 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -40,8 +40,8 @@ task Sequali { >>> output { - File html = basename(reads) + ".html" - File json = basename(reads) + ".json" + File html = outDir + "/" + basename(reads) + ".html" + File json = outDir + "/" + basename(reads) + ".json" } runtime { From a488618740428dcc7e940a6b27750ff62b87428e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:28:56 +0100 Subject: [PATCH 304/439] Include all contigs by default for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 2d111a5d..d824ec13 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = false + Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 1bc3416c90953ba05d3e00370c74355ad0fa7c9b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 15:17:14 +0100 Subject: [PATCH 305/439] Work from a model tar file --- clair3.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index d824ec13..7b2d98fe 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,7 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix - File? model + File? modelTar String? builtinModel String platform Int threads = 8 @@ -37,10 +37,11 @@ task Clair3 { String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{if defined(model) then model else builtinModel}" + String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ From 8fa481125d3038034a2ae28fedf88809b10e0c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 Nov 2024 14:30:25 +0100 Subject: [PATCH 306/439] Set includeAlCtgs to false --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 7b2d98fe..bc25394b 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. + Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 98d9e2c92b0655eb022bd9793b3449ba3eb52b9f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 12 Nov 2024 08:38:00 +0100 Subject: [PATCH 307/439] Increase memory --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index bc25394b..4184f49e 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -32,7 +32,7 @@ task Clair3 { String platform Int threads = 8 Boolean includeAllCtgs = false - String memory = "20GiB" + String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } From f13a7e2dbe793b2742080b91d90e42b29f6c0e6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:47:03 +0100 Subject: [PATCH 308/439] Update parameter_meta --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 230674a5..655a0b66 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1558,6 +1558,8 @@ task SelectVariants { outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + excludeFiltered: {description: "Remove all variants that do not have a PASS filter", category: "advanced"} + selectGenotype: {description: "The genotype to be selected", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3c8d2e73d12d9cd3101752dff2976f86d61b4c23 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:48:14 +0100 Subject: [PATCH 309/439] Update changelog --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..6db06e23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,13 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ rtg Format and VcfEval tasks now handle reference as an array of files to enable caching. ++ Added --select-genotype and --exclude-filtered flags to GATK SelectVariants + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. - version 5.2.0 --------------------------- + Update cutadapt version to 4.4 From a6eec0e6af6554ba1c85a24e3a63b0bcd01cfe76 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Dec 2024 15:51:29 +0100 Subject: [PATCH 310/439] Add a readgroup flag to minimap2 --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 5709c998..e785ffd7 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -101,6 +101,7 @@ task Mapping { Int? matchingScore Int? mismatchPenalty String? howToFindGTAG + String? readgroup Int cores = 8 String memory = "24GiB" @@ -126,6 +127,7 @@ task Mapping { ~{"-A " + matchingScore} \ ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ + ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ ~{queryFile} \ | samtools sort \ From b717f3fa8d82d3bb040d3df134533839f5adec9d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 17 Dec 2024 17:33:59 +0100 Subject: [PATCH 311/439] Add -o pipefail --- minimap2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minimap2.wdl b/minimap2.wdl index e785ffd7..95b84bc4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -111,7 +111,7 @@ task Mapping { } command { - set -e + set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -a \ From 42ca869223960072ca0f9fc1e87aae7f469a4d34 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 8 Jan 2025 17:35:29 +0100 Subject: [PATCH 312/439] Allow copying of comments from fastq --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 95b84bc4..daf47a9a 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -93,6 +93,7 @@ task Mapping { Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true + Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -119,6 +120,7 @@ task Mapping { ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ + ~{true="-y" false="" copyCommentsFromFastq} \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ From 7240b178ef378d39b5cb0983cf3a681b0bf52488 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 15:53:23 +0100 Subject: [PATCH 313/439] Allow minimap2 to process uBAM --- minimap2.wdl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index daf47a9a..18127cb1 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -89,11 +89,19 @@ task Mapping { Int additionalSortThreads = 1 Int sortMemoryGb = 1 Boolean nameSorted = false + # MM, ML, MN -> Methylation flags + # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. + # ch -> channel + # st -> start time + # du -> duration + # dx -> Whether read was duplex + # pi -> Parent ID for split read + + String tagsToKeep = "MM,ML,MN,ch,st,du,dx,pi" Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true - Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -111,16 +119,21 @@ task Mapping { String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } - command { + # Always run data through samtools fastq. This supports both FASTQ and uBAM + # files. It does remove any existing FASTQ comments, but this should not be + # problematic for most files. + + command <<< set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" + samtools fastq -T "~{tagsToKeep}" ~{queryFile} | \ minimap2 \ -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ - ~{true="-y" false="" copyCommentsFromFastq} \ + -y \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ @@ -131,7 +144,7 @@ task Mapping { ~{"-u " + howToFindGTAG} \ ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ - ~{queryFile} \ + - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ @@ -139,7 +152,7 @@ task Mapping { -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam samtools index ~{outputPrefix}.bam - } + >>> output { File bam = "~{outputPrefix}.bam" @@ -168,6 +181,7 @@ task Mapping { retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} + tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From c7c1b5bb932de4ea6d1ca3069007d4e1ad5c168d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:04:08 +0100 Subject: [PATCH 314/439] Allow sample name to set --- clair3.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clair3.wdl b/clair3.wdl index 4184f49e..db2c2fb5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,6 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix + String? sampleName File? modelTar String? builtinModel String platform @@ -50,6 +51,7 @@ task Clair3 { --output=out \ --threads=~{threads} \ --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ ~{true="--include_all_ctgs" false ="" includeAllCtgs} mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi From e3ceb602b5baf955f850f30301a68bc1a1a1c970 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:47:20 +0100 Subject: [PATCH 315/439] Proper numshards to deepvariant and update it to latest version --- deepvariant.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 25d05bd9..2d212000 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -27,19 +27,19 @@ task RunDeepVariant { File inputBam File inputBamIndex String modelType - String outputVcf + String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int? numShards + Int numShards = 4 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true - String memory = "3GiB" + String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.0.0" + String dockerImage = "google/deepvariant:1.6.1" } command { @@ -62,6 +62,7 @@ task RunDeepVariant { memory: memory time_minutes: timeMinutes docker: dockerImage + cpu: numShards } output { From a5dca2e7596f50436beb6c69b597722dc4aaa764 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 11:28:46 +0100 Subject: [PATCH 316/439] Add modkit pileup --- modkit.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 modkit.wdl diff --git a/modkit.wdl b/modkit.wdl new file mode 100644 index 00000000..4ac6bfa6 --- /dev/null +++ b/modkit.wdl @@ -0,0 +1,64 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Pileup { + input { + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + File bam + File bamIndex + String outputBed = "output.bed" + File referenceFasta + File referenceFastaFai + + Int? intervalSize + File? includeBed + + Boolean cpg = false + Boolean combineMods = false + String logFilePath = "modkit.log" + + Int threads = 4 + + } + + command <<< + set -e + mkdir -p $(dirname ~{outputBed}) + mkdir -p $(dirname ~{logFilePath}) + modkit pileup \ + --threads ~{threads} \ + ~{"--interval-size " + intervalSize} \ + ~{"--include-bed " + includeBed} + --ref ~{referenceFasta} \ + ~{true="--cpg" false="" cpg} \ + ~{true="--combine-mods" false="" combineMods} \ + --log-filepath ~{logFilePath} \ + ~{bam} \ + ~{outputBed} + >>> + + runtime { + docker: dockerImage + cpu: threads + + } +} \ No newline at end of file From 085fc5dd691444c9bcdb6c0483413ce5c1cf8d5f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:15:28 +0100 Subject: [PATCH 317/439] Update modkit --- modkit.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 4ac6bfa6..9f311121 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -56,6 +56,11 @@ task Pileup { ~{outputBed} >>> + output { + File out = outputBed + File logFile = logFilePath + } + runtime { docker: dockerImage cpu: threads From 3540b4a12a2b7d56249f2d20941a6526af9c8f6e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:19:14 +0100 Subject: [PATCH 318/439] Add memory to modkit --- modkit.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 9f311121..96f92c41 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -37,6 +37,7 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 4 + String memory = "16GiB" } @@ -64,6 +65,7 @@ task Pileup { runtime { docker: dockerImage cpu: threads + memory: memory } } \ No newline at end of file From bc179875e1cf04fcd4efc63338b73d1230e3ef96 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:23:21 +0100 Subject: [PATCH 319/439] Add missing backslash --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 96f92c41..4f8bceb4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -48,7 +48,7 @@ task Pileup { modkit pileup \ --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ - ~{"--include-bed " + includeBed} + ~{"--include-bed " + includeBed} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ From c69c5cb2031913669dba5bf2cfe1acc4b00fed95 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 15:44:00 +0100 Subject: [PATCH 320/439] Set rather high defaults for time and memory for modkit --- modkit.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 4f8bceb4..d827d896 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -36,8 +36,9 @@ task Pileup { Boolean combineMods = false String logFilePath = "modkit.log" - Int threads = 4 - String memory = "16GiB" + Int threads = 8 + String memory = "48GiB" + Int timeMinutes = 4320 # 3 Days } @@ -66,6 +67,6 @@ task Pileup { docker: dockerImage cpu: threads memory: memory - + time_minutes: timeMinutes } } \ No newline at end of file From beec409c6e2ce345d6976f159d7da73b79110fe4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 16:35:59 +0100 Subject: [PATCH 321/439] Upgrade sequali memory --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index ed6e5d40..664fc082 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -46,7 +46,7 @@ task Sequali { runtime { cpu: threads - memory: "2GiB" + memory: "4GiB" docker: dockerImage time_minutes: 59 } From a87956ed26298c48b29f23782dc268f8d8bf29ff Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 15:10:51 +0100 Subject: [PATCH 322/439] Add modkit flags --- modkit.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index d827d896..35d3c7fc 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -34,6 +34,9 @@ task Pileup { Boolean cpg = false Boolean combineMods = false + Boolean combineStrands = false + Boolean bedgraph = false + String? ignore String logFilePath = "modkit.log" Int threads = 8 @@ -50,9 +53,12 @@ task Pileup { --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ ~{"--include-bed " + includeBed} \ + ~{"--ignore " + ignore} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ + ~{true="--combine-strands" false="" combineStrands} \ + ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ ~{outputBed} From 730a8a7672b491ccac1dbfdab497a9420ac40f71 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 16:12:37 +0100 Subject: [PATCH 323/439] Capture multiple output files --- modkit.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 35d3c7fc..1cac1bd1 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -65,7 +65,8 @@ task Pileup { >>> output { - File out = outputBed + File? out = outputBed # Normal mode + Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode File logFile = logFilePath } From ed50e2dfb30a8f354f4e0dd2a4f7ae5aeec952fe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Feb 2025 17:01:46 +0100 Subject: [PATCH 324/439] Update documentation for new tasks --- clair3.wdl | 22 ++++++++++++++++++++++ modkit.wdl | 30 +++++++++++++++++++++++++++++- sequali.wdl | 25 ++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index db2c2fb5..709d59b5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -69,4 +69,26 @@ task Clair3 { docker: dockerImage } + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPrefix: {description: "The output prefix where the data should be placed.", category: "common"} + modelTar: {description: "The TAR file with the model", category: "common"} + builtinModel: {description: "The builtin model name (in case a tar file is not used)", category: "common"} + sampleName: {description: "The name of the sample in the VCF", category: "common"} + platform: {description: "platform setting for clair3.", category: "required"} + includeAllCtgs: {description: "whether or not to call all contigs in the reference", category: "advanced"} + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + vcf: {description: "Output VCF file."} + vcfIndex: {description: "Output VCF index."} + + } } \ No newline at end of file diff --git a/modkit.wdl b/modkit.wdl index 1cac1bd1..382bfc09 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -22,7 +22,6 @@ version 1.0 task Pileup { input { - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" File bam File bamIndex String outputBed = "output.bed" @@ -42,6 +41,7 @@ task Pileup { Int threads = 8 String memory = "48GiB" Int timeMinutes = 4320 # 3 Days + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } @@ -76,4 +76,32 @@ task Pileup { memory: memory time_minutes: timeMinutes } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputBed: {description: "The output name where the data should be placed.", category: "common"} + + intervalSize: {description: "Sets the interval size", category: "advanced"} + includeBed: {description: "Bed file with regions to include", category: "advanced"} + cpg: {description: "Whether to call only at cpg sites", category: "advanced"} + combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} + combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} + bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} + ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} + logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + out: {description: "The output bed files. Not available when bedgraph = true."} + outFiles: {description: "Output files when bedgraph = true."} + logFile: {description: "The generated log file."} + } } \ No newline at end of file diff --git a/sequali.wdl b/sequali.wdl index 664fc082..cbd3d869 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -24,9 +24,12 @@ task Sequali { input { File reads File? mate_reads - Int threads = 2 String outDir = "." + + Int threads = 2 + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + Int timeMinutes = 59 } command <<< @@ -46,8 +49,24 @@ task Sequali { runtime { cpu: threads - memory: "4GiB" + memory: memory docker: dockerImage - time_minutes: 59 + time_minutes: timeMinutes + } + parameter_meta { + # inputs + reads: {description: "A FASTQ or BAM file.", category: "required"} + mate_reads: {description: "FASTQ mate file"} + threads: {description: "The number of cores to use.", category: "advanced"} + + outDir: {description: "The path to write the output to.", catgory: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + html: {description: "HTML report file."} + json: {description: "JSON report file for use with MultiQC."} } } \ No newline at end of file From 113d4c58930aa2fcde99eed5b018bb8061e612cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 09:13:34 +0100 Subject: [PATCH 325/439] Update changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..97a1d016 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ Add Sequali task. ++ Add Clair3 task. ++ Add Modkit task. ++ Modify minimap2 task to accept ubam input, including transfer of methylation + tags. Also sort the BAM output file by coordinate. ++ Update DeepVariant container and update resource requirements. + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From a01b54a0b79a135b3ddf319f71e51d1ef06f0f56 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:00:01 +0100 Subject: [PATCH 326/439] Indent clair3 command --- clair3.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 709d59b5..4d9092f2 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -41,20 +41,20 @@ task Clair3 { String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< - set -e - ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } - mkdir -p $(dirname ~{outputPrefix}) - run_clair3.sh \ - --model=~{modelArg} \ - --ref_fn=~{referenceFasta} \ - --bam_fn=~{bam} \ - --output=out \ - --threads=~{threads} \ - --platform=~{platform} \ - ~{"--sample_name=" + sampleName} \ - ~{true="--include_all_ctgs" false ="" includeAllCtgs} - mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi + set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } + mkdir -p $(dirname ~{outputPrefix}) + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{referenceFasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> output { From b409ca9ed22505252a4ddf8f451eb9b55be530f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:07 +0100 Subject: [PATCH 327/439] More realistic resource requirements for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 382bfc09..92905f06 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -39,8 +39,8 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 8 - String memory = "48GiB" - Int timeMinutes = 4320 # 3 Days + String memory = "4GiB" + Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From bdab5a4c0d0e8474bea79435cc128e50fe5109d2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:27 +0100 Subject: [PATCH 328/439] More specific bed file naming --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 92905f06..23269bf3 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.bed" + String outputBed = "output.methyl.bed" File referenceFasta File referenceFastaFai From c79ebd4affcc6524e671da9d6d63f98c9d3674c8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:13:54 +0100 Subject: [PATCH 329/439] Correct file extension for modkit --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 23269bf3..930b6de9 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.methyl.bed" + String outputBed = "output.bedMethyl" File referenceFasta File referenceFastaFai From 1580aae26fbec6b819d0a905959dbad7acf6fd63 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:14:19 +0100 Subject: [PATCH 330/439] Correct whitespacing Co-authored-by: Davy Cats --- sequali.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/sequali.wdl b/sequali.wdl index cbd3d869..b43cf281 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -53,6 +53,7 @@ task Sequali { docker: dockerImage time_minutes: timeMinutes } + parameter_meta { # inputs reads: {description: "A FASTQ or BAM file.", category: "required"} From 63dceb22e11e16a45f8ac04f1c466100e8a263f6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Feb 2025 16:24:21 +0100 Subject: [PATCH 331/439] Start on a VEP task --- vep.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 vep.wdl diff --git a/vep.wdl b/vep.wdl new file mode 100644 index 00000000..83eeac4e --- /dev/null +++ b/vep.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Vep { + input { + File inputFile + String outputPath = "vep.annotated.vcf.gz" + File cacheTar + File? pluginsTar + String? species + Array[String] plugins = [] + Boolean refseq = false + Boolean merged = false + + Boolean everything = false + Boolean symbol = false + + } + + command <<< + set -e + mkdir vep_cache + tar -x --directory vep_cache -f ~{cacheTar} + ~{"tar -x --directory vep_cache -f " + pluginsTar} + + # Output all stats files by default for MultiQC integration + vep \ + --input_file ~{inputFile} \ + ~{"--species " + species} \ + --stats_html --stats_text \ + --dir vep_cache \ # Output all stats files by default for MultiQC integration + + --offline \ + ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + --vcf \ + --compress-output bgzip \ + ~{true="--refseq" false="" refseq} \ + ~{true="--merged" false="" merged} \ + \ + ~{true="--everything" false="" everything} \ + ~{true="--symbol" false="" symbol} \ + + + # Cleanup the tar extract to save filesystem space + rm -rf vep_cache + + + >>> + + output { + File outputFile = outputPath + File statsHtml = outputPath + "_summary.html" + } + +} \ No newline at end of file From 405395d512611775ed38021d79b3f4f570d0f23e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 14:31:23 +0100 Subject: [PATCH 332/439] Add runtime requirements --- vep.wdl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 83eeac4e..496a6b8f 100644 --- a/vep.wdl +++ b/vep.wdl @@ -34,7 +34,10 @@ task Vep { Boolean everything = false Boolean symbol = false - } + String memory = "8GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" + } command <<< set -e @@ -71,4 +74,15 @@ task Vep { File statsHtml = outputPath + "_summary.html" } -} \ No newline at end of file + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From b6107be5cdfaf396e53f25f2d93b6220d1f14eb7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:06:54 +0100 Subject: [PATCH 333/439] Take into account cache tar size for runtime --- vep.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 496a6b8f..4cec3fa3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -35,7 +35,8 @@ task Vep { Boolean symbol = false String memory = "8GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + # Account time for unpacking the cache. + Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 5401a6050c9c288f20569b1ffb943f1a05b19d19 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:20:41 +0100 Subject: [PATCH 334/439] Cleanup command --- vep.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vep.wdl b/vep.wdl index 4cec3fa3..f9e7a4a0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } @@ -51,8 +51,7 @@ task Vep { --input_file ~{inputFile} \ ~{"--species " + species} \ --stats_html --stats_text \ - --dir vep_cache \ # Output all stats files by default for MultiQC integration - + --dir vep_cache \ --offline \ ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ --vcf \ From 701b819d7bebab81385dbd3c159f31ab37e5961b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:41:20 +0100 Subject: [PATCH 335/439] Add missing ~ --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index f9e7a4a0..636a8ce0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -53,7 +53,7 @@ task Vep { --stats_html --stats_text \ --dir vep_cache \ --offline \ - ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ --compress-output bgzip \ ~{true="--refseq" false="" refseq} \ From e4654bc7be895cdf5fc80c02fdbfb84b8941d2aa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:51:53 +0100 Subject: [PATCH 336/439] properly format commandline option --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 636a8ce0..626257a3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -55,7 +55,7 @@ task Vep { --offline \ ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ - --compress-output bgzip \ + --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ \ From bda5ff43ad460a51adcfa9daeb3432ec2156c80d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:21:23 +0100 Subject: [PATCH 337/439] Fix trailing whitespace --- vep.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 626257a3..f2ca4a6e 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,7 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ - ~{"--species " + species} \ + ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ --offline \ @@ -58,7 +58,6 @@ task Vep { --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ - \ ~{true="--everything" false="" everything} \ ~{true="--symbol" false="" symbol} \ From 967934c2fd0a4a4f29e4ad87475cd9c68a22298a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:39:43 +0100 Subject: [PATCH 338/439] Add missing output file param --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index f2ca4a6e..064cf41a 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,6 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ + --output_file ~{outputPath} \ ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ @@ -71,6 +72,7 @@ task Vep { output { File outputFile = outputPath File statsHtml = outputPath + "_summary.html" + File statsTxt = outputPath + "_summary.txt" } runtime { From 115f3cfc0da031309a42a5a02d0825a06e1d3e85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 17:03:00 +0100 Subject: [PATCH 339/439] Make sure output directory is made --- vep.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/vep.wdl b/vep.wdl index 064cf41a..7fb6a660 100644 --- a/vep.wdl +++ b/vep.wdl @@ -43,6 +43,7 @@ task Vep { command <<< set -e mkdir vep_cache + mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} From f29492641550c6d2247a40d216d53c5030d7983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:22:49 +0100 Subject: [PATCH 340/439] Complete VEP task --- vep.wdl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/vep.wdl b/vep.wdl index 7fb6a660..8a5a443b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -41,12 +41,14 @@ task Vep { } command <<< - set -e + set -eu mkdir vep_cache mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} + # Make sure vep can error, so the removal always succeeds. + set +e # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ @@ -61,13 +63,14 @@ task Vep { ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ ~{true="--everything" false="" everything} \ - ~{true="--symbol" false="" symbol} \ - + ~{true="--symbol" false="" symbol} + VEP_EXIT_CODE=$? + set -e # Cleanup the tar extract to save filesystem space rm -rf vep_cache - + exit $VEP_EXIT_CODE >>> output { @@ -83,8 +86,23 @@ task Vep { } parameter_meta { + # input + inputFile: {description: "The VCF to annotate.", category: "required"} + outputPath: {description: "Where to put the output file", category: "advanced"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + refseq: {description: "Use the refseq cache", category: "common"} + merged: {description: "Use the merged cache", category: "common"} + everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} + symbol: {description: "Add the gene symbol to the output where available", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputFile: {description: "The annotated VEP VCF file."} + statsHtml: {description: "The VEP summary stats HTML file."} + statsTxt: {description: "The VEP summary stats TXT file."} } } From eca4681a0baf841dc2fffc2ca3f22930822740a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:45:48 +0100 Subject: [PATCH 341/439] Add VEP to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1276efaa..378731bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add VEP task. + Add Sequali task. + Add Clair3 task. + Add Modkit task. From 203d178e3ea80abef927e7f1ac67d00fec93ff75 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:15:50 +0100 Subject: [PATCH 342/439] Add missing parameter_meta for VEP --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index 8a5a443b..349242fb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -91,6 +91,8 @@ task Vep { outputPath: {description: "Where to put the output file", category: "advanced"} cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + species: {description: "Which species cache to use", category: "common"} + plugins: {description: "Which plugins to use", category: "common"} refseq: {description: "Use the refseq cache", category: "common"} merged: {description: "Use the merged cache", category: "common"} everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} From 117e5317fbb50c5989b1afd668d469569b78127e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:20:15 +0100 Subject: [PATCH 343/439] Add missing Minimap2 parameter_meta --- minimap2.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 18127cb1..da301bd3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -183,6 +183,11 @@ task Mapping { mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} + compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} + additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + nameSorted: {description: "Output a name sorted file instead", category: "common"} + cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 319501e7ebbc0fa76baaac1d48d56294eda4b86c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:21:25 +0100 Subject: [PATCH 344/439] Add a samtools split task --- samtools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..a82bbda1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -514,6 +514,61 @@ task Sort { } } +task Split { + input { + File inputBam + Directory outputPath + String? unaccountedPath + String? filenameFormat = "%!.%." + String? outputFormat = "bam" + Boolean writeIndex = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + mkdir -p "~{outputPath}" + samtools split \ + --output-fmt ~{outputFormat} \ + -f "~{outputPath}/rg/~{filenameFormat}" \ + ~{"-u " + unaccountedPath} \ + ~{true="--write-index" false="" writeIndex} \ + ~{inputBam} + } + + output { + Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + File? unaccounted = unaccountedPath + } + + runtime { + cpu: threads + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputBam: {description: "The bam file to split.", category: "required"} + outputPath: {description: "Directory to store output bams", category: "required"} + + # Optional parameters + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} + writeIndex: {description: "Automatically index outputs", category: "indexing"} + + # outputs + split: {description: "BAM file split by read groups"} + unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} + } +} + task Tabix { input { File inputFile From 60dcef74f6229d81d19436a361f3e4e6aa41ddd0 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:22:35 +0100 Subject: [PATCH 345/439] Register in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..2993ddc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ New samtools task: split. version 5.2.0 --------------------------- From 4030091ee212be3cc040c69a61834684b8c8be0e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:27:49 +0100 Subject: [PATCH 346/439] Directory not yet available --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index a82bbda1..51230097 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -517,7 +517,7 @@ task Sort { task Split { input { File inputBam - Directory outputPath + String outputPath String? unaccountedPath String? filenameFormat = "%!.%." String? outputFormat = "bam" From 8a0de277c0b69a7607757a0c8c102a379e8e444c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:28:19 +0100 Subject: [PATCH 347/439] Must be defined --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 51230097..a2be09a4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -519,8 +519,8 @@ task Split { File inputBam String outputPath String? unaccountedPath - String? filenameFormat = "%!.%." - String? outputFormat = "bam" + String filenameFormat = "%!.%." + String outputFormat = "bam" Boolean writeIndex = false Int threads = 1 From b70891c3aea7314777aaf5122de3beadf10965e3 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 12:27:17 +0100 Subject: [PATCH 348/439] noticed in wdl-aid that only these are permitted --- samtools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index a2be09a4..2fe9a9f7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -558,10 +558,10 @@ task Split { outputPath: {description: "Directory to store output bams", category: "required"} # Optional parameters - unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} - filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} - writeIndex: {description: "Automatically index outputs", category: "indexing"} + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} + writeIndex: {description: "Automatically index outputs", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 1ec88558c5b21cb1362518b2c4af95a865abcc68 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:26 +0100 Subject: [PATCH 349/439] Add compression level parameter, defaulting to 1 --- samtools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 2fe9a9f7..c46ea88b 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -523,6 +523,8 @@ task Split { String outputFormat = "bam" Boolean writeIndex = false + Int compressionLevel = 1 + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) @@ -534,6 +536,7 @@ task Split { mkdir -p "~{outputPath}" samtools split \ --output-fmt ~{outputFormat} \ + --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ ~{true="--write-index" false="" writeIndex} \ @@ -562,6 +565,7 @@ task Split { filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 153db04100bf78f07b898d523a6da84544d8a02b Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:37 +0100 Subject: [PATCH 350/439] default to indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c46ea88b..554d0903 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -521,7 +521,7 @@ task Split { String? unaccountedPath String filenameFormat = "%!.%." String outputFormat = "bam" - Boolean writeIndex = false + Boolean writeIndex = true Int compressionLevel = 1 From 1522785ae1cec9254e5bf57f942260eab2babfd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:33 +0100 Subject: [PATCH 351/439] Remove control of output format --- samtools.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 554d0903..7eba529c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - String outputFormat = "bam" Boolean writeIndex = true Int compressionLevel = 1 @@ -535,7 +534,7 @@ task Split { set -e mkdir -p "~{outputPath}" samtools split \ - --output-fmt ~{outputFormat} \ + --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ @@ -544,7 +543,7 @@ task Split { } output { - Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + Array[File] splitBam = glob(outputPath + "/rg/*.bam") File? unaccounted = unaccountedPath } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} From 2bba90e99bbc61dc08905a569d8bbb3df285878a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:42 +0100 Subject: [PATCH 352/439] include indexes --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7eba529c..bfed7560 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -544,6 +544,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") File? unaccounted = unaccountedPath } @@ -566,7 +567,8 @@ task Split { compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs - split: {description: "BAM file split by read groups"} + splitBam: {description: "BAM file split by read groups"} + splitBamIndex: {description: "BAM indexes"} unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} } } From bd4a8567cdedabf6aa1e779fa1af731b09e64b49 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:19:02 +0100 Subject: [PATCH 353/439] write index is non-optional --- samtools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index bfed7560..1660aac3 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - Boolean writeIndex = true Int compressionLevel = 1 @@ -538,7 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - ~{true="--write-index" false="" writeIndex} \ + --write-index \ ~{inputBam} } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs From be0aabe03a8615dad5190b5e4c4c9869bb472c2e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:49:15 +0100 Subject: [PATCH 354/439] make subdirectory as well --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 1660aac3..c452664c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -531,7 +531,7 @@ task Split { command { set -e - mkdir -p "~{outputPath}" + mkdir -p "~{outputPath}/rg/" samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 10e83c1c116d55d148534c7f9fc56056773aadb7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:03:06 +0100 Subject: [PATCH 355/439] emits csi extension instead --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c452664c..191a99a2 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -543,7 +543,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") - Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bam.csi") File? unaccounted = unaccountedPath } From 6ebf7cd161f15add1c8ed9af8f000ab0952d232c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:14:42 +0100 Subject: [PATCH 356/439] missing threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 191a99a2..19ad8dab 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -537,6 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ + --threads ~{threads} \ --write-index \ ~{inputBam} } From 6f9350106827f108f7be38b0d0440a0243174664 Mon Sep 17 00:00:00 2001 From: Helena Date: Mon, 10 Mar 2025 14:00:24 +0100 Subject: [PATCH 357/439] Update samtools.wdl --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..66dc647f 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -167,7 +167,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } From 0ff8d9891a82ff8daf784b782d5007b4ed5cdd16 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 19 Mar 2025 18:33:55 +0100 Subject: [PATCH 358/439] Add link to mentioned VEP website to save time in future --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 349242fb..e99c9fdb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -89,7 +89,7 @@ task Vep { # input inputFile: {description: "The VCF to annotate.", category: "required"} outputPath: {description: "Where to put the output file", category: "advanced"} - cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work (http://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html)", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} species: {description: "Which species cache to use", category: "common"} plugins: {description: "Which plugins to use", category: "common"} From 3ea61f0d2fe6f16eba1afde9255c15bc368975dd Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:56:01 +0100 Subject: [PATCH 359/439] Add a samtools quickcheck task which returns the input bam. This is designed to enable us to more quickly catch problematic BAMs, and fail earlier in the pipeline than after we've wasted some significant compute time. --- CHANGELOG.md | 1 + samtools.wdl | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..a41b47cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Added `samtools.Quickcheck` to allow failing on truncated files early. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..ea615bae 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -452,6 +452,46 @@ task Merge { } } +task Quickcheck { + input { + File inputBam + + Int threads = 1 + Int memoryGb = 1 + Int timeMinutes = 1 + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + samtools quickcheck ~{inputBam} + } + + output { + File outputBam = inputBam + } + + runtime { + cpu: threads + memory: "~{memoryGb}GiB" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} + + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "The exact same input file, but use this so it is recognised as a dependent task."} + } +} + task Sort { input { File inputBam From 38c5c9ad46e56e6c6e04853bc278e07c24221a28 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:20:20 +0100 Subject: [PATCH 360/439] Collate fastq file before splitting It was reported to me that the _R1/_R2 from `samtools fastq` were not collated properly, that a single read was appearing in two wildly different places in R1/R2 which is completely silly. I have tried to reproduce this but thus far have been unable to: $ samtools view -b FILE.bam chrM > tmp.bam $ du -h tmp.bam 560K tmp.bam $ samtools fastq -1 paired1.fq -2 paired2.fq -0 /dev/null -s /dev/null -n tmp.bam [M::bam2fq_mainloop] discarded 480 singletons [M::bam2fq_mainloop] processed 608 reads $ diff <(grep ^@D paired1.fq) <(grep ^@D paired2.fq) $ Note the complete lack of difference between ordering. But if we look at the output of files which have come out of this tool, there are clear differences: $ zless R1.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:2113:10647:9989 @D_____________________:2208:9374:82968 $ zless R2.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:1214:3361:56060 @D_____________________:1309:8329:98995 these were produced by the command $ set -e $ mkdir -p "$(dirname split/R1.fastq.gz)" $ samtools fastq \ -1 split/R1.fastq.gz \ -2 split/R2.fastq.gz \ -n \ --threads 1 \ /mnt/miniwdl/out.bam This is indeed documented behaviour however: > If the input contains read-pairs which are to be interleaved or > written to separate files in the same order, then the input should be > first collated by name. Use samtools collate or samtools sort -n to > ensure this. > > https://www.htslib.org/doc/samtools-fasta.html#DESCRIPTION So it makes some sense to collate, or at some point ensure that the BAMs are sorted. I think there is a discussion to be had over whether automatic collation in sensible or a waste of runtime, but on the other hand, this is maybe a small footgun and eliminating it would make sense to reduce the potential failure modes (give our focus on reducing risk and all.) --- CHANGELOG.md | 1 + samtools.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..abf77c00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..02a5ed52 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -174,6 +174,7 @@ task Fastq { command { set -e mkdir -p "$(dirname ~{outputRead1})" + samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ @@ -184,8 +185,7 @@ task Fastq { ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} \ - ~{inputBam} + ~{"--threads " + threads} } output { From 47efde79998bd64c25ef546e6387ff37254fa192 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 12:23:19 +0100 Subject: [PATCH 361/439] Hardcode runtime per feedback --- samtools.wdl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index ea615bae..8bb2df87 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,9 +456,6 @@ task Quickcheck { input { File inputBam - Int threads = 1 - Int memoryGb = 1 - Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } @@ -472,9 +469,7 @@ task Quickcheck { } runtime { - cpu: threads - memory: "~{memoryGb}GiB" - time_minutes: timeMinutes + time_minutes: 5 docker: dockerImage } @@ -482,9 +477,6 @@ task Quickcheck { # inputs inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs From 9fd1c2cfb9431a31d48dab6eaadf9f14faf96326 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 14:13:59 +0100 Subject: [PATCH 362/439] do not use default cpu/mem --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 8bb2df87..a009500c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -469,7 +469,9 @@ task Quickcheck { } runtime { + cpu: 1 time_minutes: 5 + memory: "1GiB" docker: dockerImage } From d0cc47c6421d990b2f2ed18b6ef5476cd5a19dd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:46:10 +0100 Subject: [PATCH 363/439] Add wa/wb/s flags to bedtools intersect Fix bug whereby missing outdir would cause a failure. --- CHANGELOG.md | 1 + bedtools.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..4bd6ae1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. version 5.2.0 --------------------------- diff --git a/bedtools.wdl b/bedtools.wdl index fe18ede6..a5d8aab3 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -267,6 +267,10 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. + Boolean writeA = false + Boolean writeB = false + Boolean stranded = false + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -276,10 +280,14 @@ task Intersect { command { set -e + mkdir -p "$(dirname ~{outputBed})" ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} bedtools intersect \ -a ~{regionsA} \ -b ~{regionsB} \ + ~{true="-wa" false="" writeA} \ + ~{true="-wb" false="" writeB} \ + ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} @@ -301,6 +309,11 @@ task Intersect { regionsB: {description: "Region file b to intersect.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} + + writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} + writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} + stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From fff0fe8fe9cf1f022369dcfb05e5f4980f0f8115 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 08:52:51 +0200 Subject: [PATCH 364/439] Update pbmm2 image --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index ea7c05df..91b0b1fe 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -31,7 +31,7 @@ task Mapping { Int cores = 4 String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) - String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" + String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } command { From 084486c19bcde6398d41381c0628f5c359c7c53b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 09:05:59 +0200 Subject: [PATCH 365/439] Add pbmm2 outputPrefix parameter --- CHANGELOG.md | 2 ++ pbmm2.wdl | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dde73d44..dd536e5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Allow pbmm2 to work with a set output prefix for the BAM file. ++ Update pbmm2 docker container to version 1.17 + Add VEP task. + Add Sequali task. + Add Clair3 task. diff --git a/pbmm2.wdl b/pbmm2.wdl index 91b0b1fe..915fbb02 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,6 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample + String outputPrefix = sample + ".align" File referenceMMI File queryFile @@ -35,6 +36,8 @@ task Mapping { } command { + set -e + mkdir -p ~{outputPrefix} pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ @@ -42,12 +45,12 @@ task Mapping { ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ - ~{sample}.align.bam + ~{outputPrefix}.bam } output { - File outputAlignmentFile = sample + ".align.bam" - File outputIndexFile = sample + ".align.bam.bai" + File outputAlignmentFile = outputPrefix + ".bam" + File outputIndexFile = outputPrefix + ".bam.bai" } runtime { @@ -62,6 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -69,7 +73,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # outputs + # output outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } From 912754990f49d74b69a170bf68901e6ecd1f9557 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:02 +0200 Subject: [PATCH 366/439] Use a better output prefix Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 915fbb02..f8abbd64 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,7 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample - String outputPrefix = sample + ".align" + String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile From 408757f683bf02d0bcf214cd72a4aee732d520d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:16 +0200 Subject: [PATCH 367/439] Add missing dirname call Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index f8abbd64..b00e249e 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -37,7 +37,7 @@ task Mapping { command { set -e - mkdir -p ~{outputPrefix} + mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ From 8e008554a71cb5de37c69f80321b0d4d39dcf750 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:58:13 +0200 Subject: [PATCH 368/439] Add missing comma --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index b00e249e..73e74c0c 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -65,7 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} - outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 7d6da07cd4dbe09e42cf343e9077d0118e4d1264 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:48:03 +0100 Subject: [PATCH 369/439] Deprecated bedgraph option, produce it by default --- CHANGELOG.md | 1 + modkit.wdl | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..337a68db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..7376a567 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,6 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" + String outputBedGraph = "m_CG0_combined.bedgraph" File referenceFasta File referenceFastaFai @@ -34,7 +35,6 @@ task Pileup { Boolean cpg = false Boolean combineMods = false Boolean combineStrands = false - Boolean bedgraph = false String? ignore String logFilePath = "modkit.log" @@ -42,7 +42,6 @@ task Pileup { String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" - } command <<< @@ -58,15 +57,17 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ - ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ - ~{outputBed} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> + # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. + # https://github.com/nanoporetech/modkit/issues/210#issuecomment-2181706374 + output { - File? out = outputBed # Normal mode - Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode + File out = outputBed # Normal mode + File outFiles = outputBedGraph # Bedgraph mode File logFile = logFilePath } @@ -104,4 +105,4 @@ task Pileup { outFiles: {description: "Output files when bedgraph = true."} logFile: {description: "The generated log file."} } -} \ No newline at end of file +} From 9d2a4735bf221410b7a1b6b3ad1cd5e5edad3423 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:33:52 +0100 Subject: [PATCH 370/439] Update parameter_meta for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 7376a567..5ba1f501 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -84,14 +84,14 @@ task Pileup { bamIndex: {description: "The index for the input alignment file", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - outputBed: {description: "The output name where the data should be placed.", category: "common"} + outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} + outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} cpg: {description: "Whether to call only at cpg sites", category: "advanced"} combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} - bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} From feaacf40fb1fb2edf4588d63b5baee4f8eac18a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:37:39 +0100 Subject: [PATCH 371/439] Fix typo --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 5ba1f501..9311e4da 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -85,7 +85,7 @@ task Pileup { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} - outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} + outputBedGraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} From 9e057d6ce259e5fc96ffb04208c37bda8b43ec3e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 31 Mar 2025 14:04:15 +0200 Subject: [PATCH 372/439] split into separate files --- modkit.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 9311e4da..78df28f4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,7 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" - String outputBedGraph = "m_CG0_combined.bedgraph" + String outputBedGraph = "combined.bedgraph" File referenceFasta File referenceFastaFai @@ -59,7 +59,9 @@ task Pileup { ~{true="--combine-strands" false="" combineStrands} \ --log-filepath ~{logFilePath} \ ~{bam} \ - - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' + # Separately generate the combined file as well, so users can have a choice. + cat ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. @@ -67,7 +69,8 @@ task Pileup { output { File out = outputBed # Normal mode - File outFiles = outputBedGraph # Bedgraph mode + File outGraph = outputBedGraph # Normal mode + Array[File] outFiles = glob(outputBedGraph + "*.bedGraph") # Bedgraph mode File logFile = logFilePath } From e439d58c8e9584c8957a4ecb265ce5f7de9f96ce Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 14:06:31 +0200 Subject: [PATCH 373/439] Add Mosdepth task --- CHANGELOG.md | 1 + mosdepth.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 mosdepth.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..986dfd13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/mosdepth.wdl b/mosdepth.wdl new file mode 100644 index 00000000..0f800769 --- /dev/null +++ b/mosdepth.wdl @@ -0,0 +1,106 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mosdepth { + input { + File bam + File bamIndex + String prefix = "./out" + + String? chrom + # --by flag takes a BED file or an integer. So there need to be two inputs in WDL's typed system. + File? byBed + Int? byWindow + File? fasta + Int? flag + Int? includeFlag + + Boolean noPerBase = false + Boolean d4 = false + Boolean fastMode = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 + String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" + } + + command <<< + set -e + mkdir -p $(dirname ~{prefix}) + mosdepth \ + --threads ~{threads} \ + ~{"--chrom " + chrom} \ + ~{"--by " + byBed} \ + ~{"--by " + byWindow} \ + ~{"--fasta " + fasta} \ + ~{true="--no-per-base" false="" noPerBase} \ + ~{true="--d4" false="" d4} \ + ~{"--flag " + flag} \ + ~{"--include-flag " + includeFlag} \ + ~{true="--fast-mode" false="" fastMode} \ + ~{prefix} ~{bam} + >>> + + output { + File globalDist = "~{prefix}.mosdepth.global.dist.txt" + File summary = "~{prefix}.mosdepth.summary.txt" + File? perBaseBed = "~{prefix}.per-base.bed.gz" + File? regionsBed = "~{prefix}.regions.bed.gz" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bam: {description: "Input BAM or CRAM file.", category: "required"} + bamIndex: {description: "Index for the input BAM or CRAM file.", category: "required"} + prefix: {description: "Output prefix.", category: "common"} + + chrom: {description: "Chromosome to restrict depth calculation.", category: "advanced"} + byBed: {description: "Bed file with windows to include for the --by flag. Should not be used together with byWindow.", category: "common"} + byWindow: {description: "Integer window size for the --by flag. Should not be used together with byBed.", category: "advanced"} + fasta: {description: "FASTA file, only necessary when CRAM input is used.", category: "advanced"} + flag: {description: "Exclude reads with any of the bits in FLAG set.", category: "advanced"} + includeFlag: {description: "Only include reads with any of the bits in FLAG set.", category: "advanced"} + + noPerBase: {description: "Don't output per-base depth. Skipping this output will speed execution.", category: "common"} + d4: {description: "output per-base depth in d4 format.", category: "advanced"} + fastMode: {description: "Don't look at internal cigar operations or correct mate overlaps (recommended for most use-cases).", category: "common"} + + threads: {description: "How many threads to use.", category: "common"} + memory: {description: "How much memory to allocate.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + globalDist: {description: "Global distribution table file."} + summary: {description: "Summary table file."} + perBaseBed: {description: "Per base coverage BED file."} + regionsBed: {description: "Per region BED file, if byBed or byWindow is used."} + } +} \ No newline at end of file From 7bcac8ea2636cbeeae247d783c0dc5558bb0955a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:22:31 +0200 Subject: [PATCH 374/439] Update all samtools images --- samtools.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index d724a692..2388813e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -456,7 +456,7 @@ task Quickcheck { input { File inputBam - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -497,7 +497,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -560,7 +560,7 @@ task Split { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -669,7 +669,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputIndexPath = basename(outputFileName) + ".bai" From 435a719147253df23cad2674736d8d699b186e77 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:56:35 +0200 Subject: [PATCH 375/439] Task updates to samtools.wdl --- CHANGELOG.md | 6 +++++ samtools.wdl | 72 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..8b95b904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update docker images in samtools.wdl ++ Add threads and compression levels to applicable tasks. Default to + compression level 1. ++ samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is + the name of the flag. ++ Unused javaXmx parameter removed from samtools DictAndFaidx + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/samtools.wdl b/samtools.wdl index 2388813e..30e938b4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -24,11 +24,13 @@ task BgzipAndIndex { input { File inputFile String outputDir - String type = "vcf" + String preset = "vcf" + Int compressLevel = 1 + Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } String outputGz = outputDir + "/" + basename(inputFile) + ".gz" @@ -36,8 +38,15 @@ task BgzipAndIndex { command { set -e mkdir -p "$(dirname ~{outputGz})" - bgzip -c ~{inputFile} > ~{outputGz} - tabix ~{outputGz} -p ~{type} + bgzip \ + --threads ~{threads} \ + --compress-level ~{compressLevel} \ + -c ~{inputFile} > ~{outputGz} + + tabix \ + --preset ~{preset} \ + --threads ~{threads - 1} \ + ~{outputGz} } output { @@ -46,6 +55,7 @@ task BgzipAndIndex { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -55,7 +65,7 @@ task BgzipAndIndex { # inputs inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} - type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -69,7 +79,6 @@ task BgzipAndIndex { task DictAndFaidx { input { File inputFile - String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -102,7 +111,6 @@ task DictAndFaidx { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -163,7 +171,7 @@ task Fastq { Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Int? compressionLevel + Int compressionLevel = 1 Int threads = 1 String memory = "1GiB" @@ -184,8 +192,8 @@ task Fastq { ~{"-G " + excludeSpecificFilter} \ ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} + -c ~{compressionLevel} \ + "--threads " ~{threads - 1} } output { @@ -276,6 +284,8 @@ task Flagstat { File inputBam String outputPath + Int threads = 1 + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -284,7 +294,9 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + samtools flagstat \ + --threads ~{threads - 1} + ~{inputBam} > ~{outputPath} } output { @@ -292,6 +304,7 @@ task Flagstat { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -316,6 +329,8 @@ task Index { String? outputBamPath + Int threads = 1 + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -334,7 +349,9 @@ task Index { mkdir -p "$(dirname ~{outputPath})" ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi - samtools index ~{outputPath} ~{bamIndexPath} + samtools index \ + --threads ~{threads -1} \ + ~{outputPath} ~{bamIndexPath} ' } @@ -344,6 +361,7 @@ task Index { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -367,6 +385,7 @@ task Markdup { input { File inputBam String outputBamPath + Int threads = 1 Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -375,7 +394,9 @@ task Markdup { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + samtools markdup \ + --threads ~{threads - 1} \ + ~{inputBam} ~{outputBamPath} } output { @@ -383,6 +404,7 @@ task Markdup { } runtime { + cpu: threads docker: dockerImage time_minutes: timeMinutes } @@ -405,6 +427,10 @@ task Merge { String outputBamPath = "merged.bam" Boolean force = true + Boolean combineRGHeaders = false + Boolean combinePGHeaders = false + + Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) @@ -420,6 +446,9 @@ task Merge { samtools merge \ --threads ~{threads - 1} \ ~{true="-f" false="" force} \ + -l ~{compressionLevel} \ + ~{true="-c" false="" combineRGHeaders} \ + ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -514,7 +543,7 @@ task Sort { -o ~{outputPath} \ ~{inputBam} samtools index \ - -@ ~{threads} \ + --threads ~{threads - 1} \ ~{outputPath} ~{bamIndexPath} } @@ -571,7 +600,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - --threads ~{threads} \ + --threads ~{threads - 1} \ --write-index \ ~{inputBam} } @@ -610,10 +639,10 @@ task Tabix { input { File inputFile String outputFilePath = basename(inputFile) - String type = "vcf" + String preset = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } # FIXME: It is better to do the indexing on VCF creation. @@ -625,7 +654,7 @@ task Tabix { then ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi - tabix ~{outputFilePath} -p ~{type} + tabix ~{outputFilePath} -p ~{preset} } output { @@ -643,7 +672,7 @@ task Tabix { # inputs inputFile: {description: "The file to be indexed.", category: "required"} outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} - type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -666,6 +695,8 @@ task View { Int? MAPQthreshold File? targetFile + Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) @@ -682,11 +713,12 @@ task View { ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ ~{true="-u " false="" uncompressedBamOutput} \ + ~{true="--fast" false="" fast} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ - ~{"--threads " + (threads - 1)} \ + --threads ~{threads - 1} \ ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} From d20b313ea01c0dc3fe318206daac4d976c22bc5b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:58:17 +0200 Subject: [PATCH 376/439] Increase mosdepth default memory --- mosdepth.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosdepth.wdl b/mosdepth.wdl index 0f800769..43e95614 100644 --- a/mosdepth.wdl +++ b/mosdepth.wdl @@ -39,7 +39,7 @@ task Mosdepth { Boolean fastMode = false Int threads = 1 - String memory = "1GiB" + String memory = "4GiB" Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" } From 046eecb3af6887d6aad1c31a4521951822683259 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:59:19 +0200 Subject: [PATCH 377/439] Allocate more time for merging --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 30e938b4..915bb848 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -433,7 +433,7 @@ task Merge { Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } From f5765ffd1e75964a43da36c500741610e005c554 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:20:18 +0200 Subject: [PATCH 378/439] Update clair3 image --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 4d9092f2..57984a32 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -35,7 +35,7 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" From bfd433dd4f698bf141c7add6cc42ea58d56ca3a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:25:45 +0200 Subject: [PATCH 379/439] Update deepvariant image --- deepvariant.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 2d212000..e9e6c18c 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -35,11 +35,11 @@ task RunDeepVariant { String? outputGVcfIndex File? regions String? sampleName - Boolean? VCFStatsReport = true + Boolean VCFStatsReport = true String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.6.1" + String dockerImage = "google/deepvariant:1.8.0" } command { From cfbc34deb566ddb2ce0561168c7fb3dd3b0ae1e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 11:30:12 +0200 Subject: [PATCH 380/439] Update several images --- CHANGELOG.md | 1 + modkit.wdl | 2 +- multiqc.wdl | 2 +- picard.wdl | 34 +++++++++++++++++----------------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b95b904..8c13cacc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ version 6.0.0-dev + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. + Unused javaXmx parameter removed from samtools DictAndFaidx ++ Update Picard images + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..6a7d9b4d 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -41,7 +41,7 @@ task Pileup { Int threads = 8 String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a2e32cdb..fae52178 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/picard.wdl b/picard.wdl index 6628cf0e..fd072523 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -210,7 +210,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -336,7 +336,7 @@ task CollectRnaSeqMetrics { String memory = "9GiB" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -394,7 +394,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -456,7 +456,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -569,7 +569,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -621,7 +621,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -684,7 +684,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -753,7 +753,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -836,7 +836,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -892,7 +892,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17GiB" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" File? noneFile } @@ -953,7 +953,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -996,7 +996,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1058,7 +1058,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } @@ -1108,7 +1108,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1163,7 +1163,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { From d31f74badd4e6d8f8c1f397c4478ffa20e32437e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:02:11 +0200 Subject: [PATCH 381/439] Make resource requirements for pbmm2 and minimap2 somewhat equal --- CHANGELOG.md | 2 ++ minimap2.wdl | 8 ++++---- pbmm2.wdl | 32 +++++++++++++++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c13cacc..5fa636d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ version 6.0.0-dev + Unused javaXmx parameter removed from samtools DictAndFaidx + Update Picard images + Add Mosdepth task. ++ pbmm2 loses the sort parameter. Output is now always sorted. ++ pbmm2 gets an unmapped parameter. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/minimap2.wdl b/minimap2.wdl index da301bd3..a7584beb 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -86,8 +86,6 @@ task Mapping { File queryFile Int compressionLevel = 1 - Int additionalSortThreads = 1 - Int sortMemoryGb = 1 Boolean nameSorted = false # MM, ML, MN -> Methylation flags # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. @@ -112,6 +110,8 @@ task Mapping { String? howToFindGTAG String? readgroup + Int sortThreads = 2 + Int sortMemoryGb = 1 Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) @@ -147,7 +147,7 @@ task Mapping { - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ - -@ ~{additionalSortThreads} \ + --threads ~{sortThreads - 1} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam @@ -184,7 +184,7 @@ task Mapping { tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} - additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} nameSorted: {description: "Output a name sorted file instead", category: "common"} diff --git a/pbmm2.wdl b/pbmm2.wdl index 73e74c0c..23133278 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -23,25 +23,36 @@ version 1.0 task Mapping { input { String presetOption - Boolean sort=true + Boolean unmapped = false String sample String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile - Int cores = 4 - String memory = "30GiB" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) + Int sortMemoryGb = 1 + Int sortThreads = 2 + Int cores = 8 + String memory = "24GiB" + # Slightly higher than minimap2 as compression level can not be set. + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } + # Use cores+sortThreads to set the number of threads. Internally pbmm2 + # allocates cores - sortThreads to alignment. This leads to underutilization + # of the requested resources. Sorting uses very little CPU until the point + # comes that the memory is full and the temporary file needs to be written. + # At this point the alignment halts because the pipe is full. command { set -e mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ - ~{true="--sort" false="" sort} \ - -j ~{cores} \ + --sort \ + ~{true="--unmapped" false="" unmapped} \ + --num-threads ~{cores + sortThreads} \ + --sort-memory ~{sortMemoryGb}G \ + --sort-threads ~{sortThreads} \ ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ @@ -63,15 +74,18 @@ task Mapping { parameter_meta { # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} - sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + unmapped: {description: "Include unmapped reads in the output.", category: "common"} + + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} # output outputAlignmentFile: {description: "Mapped bam file."} From 046947847255c3323524f1c92004a66ec026b7c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:21:51 +0200 Subject: [PATCH 382/439] Increase default thread count for samtools merge --- CHANGELOG.md | 1 + samtools.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fa636d8..0781e4b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks. Default to compression level 1. diff --git a/samtools.wdl b/samtools.wdl index 915bb848..7a2223f6 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -431,7 +431,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - Int threads = 1 + # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. + Int threads = 8 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From b063b9ba79e41f3d20c64ded779a2953a1f7ec55 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:53:28 +0200 Subject: [PATCH 383/439] more time for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 57984a32..5a6154af 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,7 +34,7 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } From d502298c8ec0e594cace54e573e68b2e7a4d9041 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:54:27 +0200 Subject: [PATCH 384/439] Make sequali runtime dependent on input file size --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index b43cf281..cbca3653 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -29,7 +29,7 @@ task Sequali { Int threads = 2 String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" - Int timeMinutes = 59 + Int timeMinutes = 10 + ceil(size(reads, "GiB") + size(mate_reads, "GiB")) * 4 } command <<< From b942c7ed0a833c830aabb227a15d78ca89aecc3e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:58:12 +0200 Subject: [PATCH 385/439] Slightly higher requirements for pbmm2 than minimap2 --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 23133278..9155e7b2 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -32,7 +32,7 @@ task Mapping { Int sortMemoryGb = 1 Int sortThreads = 2 Int cores = 8 - String memory = "24GiB" + String memory = "30GiB" # Slightly higher than minimap2 as compression level can not be set. Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" From d2ac7b2ad030a00d83aa5a0100f79ec5e16dd5d1 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:20:33 +0200 Subject: [PATCH 386/439] Add filterThreshold, filterPercent to modkit pileup --- modkit.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 78df28f4..a611a620 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -31,6 +31,8 @@ task Pileup { Int? intervalSize File? includeBed + String? filterThreshold + String? filterPercentile Boolean cpg = false Boolean combineMods = false @@ -57,6 +59,8 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ + ~{"--filter-percentile " + filterPercentile} \ + ~{"--filter-threshold " + filterThreshold} \ --log-filepath ~{logFilePath} \ ~{bam} \ - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' @@ -97,12 +101,14 @@ task Pileup { combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + filterThreshold: {description: "Global filter threshold can be specified with by a decimal number (e.g. 0.75). Otherwise the automatic filter percentile will be used.", category: "advanced"} + filterPercentile: {description: "This defaults to 0.1, to remove the lowest 10% confidence modification calls, but can be manually adjusted", category: "advanced"} threads: {description: "The number of threads to use for variant calling.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + # output out: {description: "The output bed files. Not available when bedgraph = true."} outFiles: {description: "Output files when bedgraph = true."} From 204821385c3d176c3425d7052b6f3905ff46541d Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:21:11 +0200 Subject: [PATCH 387/439] Add a summary task --- CHANGELOG.md | 2 ++ modkit.wdl | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed79b5b..57519f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ version 6.0.0-dev + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. ++ Add support for filterThreshold/filterPercent for `modkit.Pileup`. ++ Add `modkit.Summary` task. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a611a620..7546458a 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -115,3 +115,65 @@ task Pileup { logFile: {description: "The generated log file."} } } + +task Summary { + input { + File bam + File bamIndex + + String summary = "modkit.summary.txt" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + Int timeMinutes = 2880 / threads # 2 Days / threads + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p $(dirname ~{summary}) + + modkit summary \ + --threads ~{threads} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + ~{bam} > ~{summary} + >>> + + output { + File summaryReport = summary # Normal mode + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From a9ec6faf3de64e110209ed2c81b1272e765a6247 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:24:46 +0200 Subject: [PATCH 388/439] Downgrade deepvariant because of a bug --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index e9e6c18c..c700416f 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -39,7 +39,9 @@ task RunDeepVariant { String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.8.0" + # Version 1.8.0 has a bug. + # https://github.com/google/deepvariant/issues/912 + String dockerImage = "google/deepvariant:1.6.1" } command { From 741f9708383ff29d0f6f548f9fffad0b8eb7ab37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:26:56 +0200 Subject: [PATCH 389/439] Increase time limit for VEP --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index e99c9fdb..2c1f923b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 15) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 4fe49b8ef3f1bae978b2fa07ac6e08a282e2f91f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 09:56:43 +0200 Subject: [PATCH 390/439] Update samtools parameter_meta --- CHANGELOG.md | 2 +- samtools.wdl | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0781e4b1..1180578a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ version 6.0.0-dev --------------------------- + Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl -+ Add threads and compression levels to applicable tasks. Default to ++ Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. diff --git a/samtools.wdl b/samtools.wdl index 7a2223f6..cd24e6e9 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,6 +69,8 @@ task BgzipAndIndex { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressLevel: {description: "Set compression level.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs compressed: {description: "Compressed input file."} @@ -317,6 +319,7 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs flagstat: {description: "The number of alignments for each FLAG type."} @@ -374,6 +377,7 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs indexedBam: {description: "BAM file that was indexed."} @@ -415,6 +419,7 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs outputBam: {description: "BAM file with duplicate alignments marked."} @@ -471,6 +476,10 @@ task Merge { bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + + combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} + combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -567,7 +576,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -696,7 +705,7 @@ task View { Int? MAPQthreshold File? targetFile - Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Boolean fast = true # Sets compression level to 1. Int threads = 1 String memory = "1GiB" @@ -707,14 +716,15 @@ task View { String outputIndexPath = basename(outputFileName) + ".bai" # Always output to bam and output header. + # -u should be after --fast, and will override it in that case. command { set -e mkdir -p "$(dirname ~{outputFileName})" samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-u " false="" uncompressedBamOutput} \ ~{true="--fast" false="" fast} \ + ~{true="-u " false="" uncompressedBamOutput} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -741,6 +751,7 @@ task View { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} + fast: {description: "Sets compression level to 1. Set to true by default.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} From 6a78f520a6efee6def3fcc257f5ea3be02daf8cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:21:57 +0200 Subject: [PATCH 391/439] Increase deep variant shards and explain memory usage --- deepvariant.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index c700416f..b0ed2a19 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -30,13 +30,17 @@ task RunDeepVariant { String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int numShards = 4 + Int numShards = 8 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean VCFStatsReport = true + # Most of the memory used is at the end, in the step where the variants + # are merged. This is a single-threaded high memory step. The number + # of shards does not influence the memory so much. + # The provided memory here is enough for merge human chromosome 1. String memory = "48GiB" Int timeMinutes = 5000 # Version 1.8.0 has a bug. From ba35d987ca3fe3c27a01034d60cd2ab09369ab31 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:36:03 +0200 Subject: [PATCH 392/439] Set a lower number of threads for samtools merge to decrease waste --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cd24e6e9..cb8dbd55 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,8 +436,9 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. - Int threads = 8 + # Merging is often a bottleneck. With compression level 1 however, + # more than three threads does not add more benefit. + Int threads = 3 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 8b41a7feddf6e1f29af7fd825cad6a0ae6811687 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:45:14 +0200 Subject: [PATCH 393/439] Dynamically set samtools merge threads --- samtools.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cb8dbd55..7dd9ecc1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,9 +436,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. With compression level 1 however, - # more than three threads does not add more benefit. - Int threads = 3 + # Use one thread per input + one for the output + one for merging + Int threads = length(bamFiles) + 2 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 17cf284d2c54212b29cdf4e6a347adc0e0a0c458 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:38:42 +0200 Subject: [PATCH 394/439] Also use threads for faster indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7dd9ecc1..811f56e0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,7 +456,7 @@ task Merge { ~{true="-c" false="" combineRGHeaders} \ ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + samtools index -@ ~{threads - 1} ~{outputBamPath} ~{indexPath} } output { From 1fae30492bdff1af750ac963d565cbb16cc6572b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:53:59 +0200 Subject: [PATCH 395/439] Add missing parameter_meta --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 811f56e0..743fce0c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -480,6 +480,8 @@ task Merge { combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d485e17399c3482aa109e0d1055c2b2bac9d93a4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 08:30:54 +0200 Subject: [PATCH 396/439] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1180578a..96adc8fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- -+ Samtools merge default thread count increased to 8. ++ MultiQC image updated to version 1.28 ++ Samtools merge now has options added for merging RG and PG headers. ++ Samtools merge default thread count increased based on the number of files. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. From 847ad71a26b3a1ddc1fc06c2fda349fc620ad2b5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 19:40:40 +0200 Subject: [PATCH 397/439] Update vt to allow a filter expression and compressed indexed output --- CHANGELOG.md | 1 + vt.wdl | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96adc8fa..dfa40b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. + Samtools merge default thread count increased based on the number of files. diff --git a/vt.wdl b/vt.wdl index 4da2d8cd..4ced1d2a 100644 --- a/vt.wdl +++ b/vt.wdl @@ -27,27 +27,39 @@ task Normalize { File referenceFasta File referenceFastaFai Boolean ignoreMaskedRef = false - String outputPath = "./vt/normalized_decomposed.vcf" + String outputPath = "./vt/normalized_decomposed.vcf.gz" + String? filterExpression + + Int compressionLevel = 1 String memory = "4GiB" - Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + Int timeMinutes = 10 + ceil(size(inputVCF, "GiB") * 240) + String dockerImage = "quay.io/biocontainers/vt:0.57721--h2419454_12" } command { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} \ + vt view -h \ + ~{"-f " + filterExpression} \ + ~{inputVCF} \ + | vt normalize - \ -r ~{referenceFasta} \ ~{true="-m " false="" ignoreMaskedRef} \ - | vt decompose -s - -o ~{outputPath} + | vt decompose -s - \ + | vt view - \ + -c ~{compressionLevel} \ + -o ~{outputPath} + vt index ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { + cpu: 1 memory: memory time_minutes: timeMinutes docker: dockerImage @@ -61,11 +73,15 @@ task Normalize { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterExpression: {description: "See https://genome.sph.umich.edu/wiki/Vt#Filters for valid expressions.", category: "common"} + compressionLevel: {description: "Compression level for the out vcf.gz file.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Normalized & decomposed VCF file."} + outputVcf: {description: "Normalized and decomposed VCF file."} + outputVcfIndex: {description: "Index for normalized and decomposed VCF file."} } } From 57018dd55c43af0013f48a61e5119128ccd87d3f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 11:28:56 +0200 Subject: [PATCH 398/439] Properly quote vt filter --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 4ced1d2a..635641e9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -41,7 +41,7 @@ task Normalize { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" vt view -h \ - ~{"-f " + filterExpression} \ + ~{"-f '" + filterExpression}~{true="'" false="" defined(filterExpression)} \ ~{inputVCF} \ | vt normalize - \ -r ~{referenceFasta} \ From e39fe10360989d5074580034a4df030e16d27f4c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:08:55 +0200 Subject: [PATCH 399/439] do not intermingle singletons --- samtools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index d724a692..ef89477d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -157,6 +157,7 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + String? outputReadS Boolean appendReadNumber = false Boolean outputQuality = false @@ -177,8 +178,10 @@ task Fastq { samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ + ~{"-s " + outputReadS} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -192,6 +195,7 @@ task Fastq { File read1 = outputRead1 File? read2 = outputRead2 File? read0 = outputRead0 + File? readS = outputReadS } runtime { @@ -207,6 +211,7 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + outputReadS: {description: "The location singleton reads should be written to.", category: "advanced"} appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} From b9319418b7a96a0046b9c034649930ccd5cf4fa9 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:09:16 +0200 Subject: [PATCH 400/439] The caches in containers caused issues --- samtools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index ef89477d..315a00b5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,9 @@ task Split { command { set -e mkdir -p "~{outputPath}/rg/" + + export XDG_CACHE_HOME=$PWD/.cache/ + export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 8ede8b774a0296fe484e9f78e25d5d358828099e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:15:01 +0200 Subject: [PATCH 401/439] add biopets validate fastq --- biopet.wdl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 biopet.wdl diff --git a/biopet.wdl b/biopet.wdl new file mode 100644 index 00000000..ea8a36c8 --- /dev/null +++ b/biopet.wdl @@ -0,0 +1,60 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task ValidateFastq { + input { + File inputRead1 + File? inputRead2 + + String memory = "1GiB" + Int timeMinutes = 5 + ceil(size(inputRead1, "GiB")) + String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--hdfd78af_3" + } + + command { + set -e + java -jar /usr/local/share/biopet-validatefastq-0.1.1-3/validatefastq-assembly-0.1.1.jar \ + --fastq1 ~{inputRead1} \ + ~{"--fastq2 " + inputRead2} + } + + output { + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputRead1: {description: "The location of the first FASTQ file (first reads for pairs, in case of paired-end sequencing).", category: "required"} + inputRead2: {description: "The location of the paired end reads.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 8493c77e477c5522b0947948b47e35be04974fc7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:19:23 +0200 Subject: [PATCH 402/439] require being explicit about locations --- samtools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 315a00b5..5bb2fb82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -177,7 +177,6 @@ task Fastq { mkdir -p "$(dirname ~{outputRead1})" samtools collate -u -O ~{inputBam} | \ samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ From 347ed91d4bff4306cea0074ca7f1c7fa2ff517b7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:20:09 +0200 Subject: [PATCH 403/439] Probably unnecessary --- samtools.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5bb2fb82..0ef1419c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,8 +571,6 @@ task Split { set -e mkdir -p "~{outputPath}/rg/" - export XDG_CACHE_HOME=$PWD/.cache/ - export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 7ff2ac2c1ebab33a3872297beb189e648eb90724 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:22:10 +0200 Subject: [PATCH 404/439] documentation --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..f20dc82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Add `biopet.ValidateFastq` to check your fastq files for pairing and other correctness. ++ **Breaking**: `samtools.Fastq` now requires defining your singleton read location. This only affects you if you were previously using this task with only a single output read file. version 5.2.0 --------------------------- From 1ee07a657fd46f6dc227573c2c59d7ef4d0cd4b9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 10:49:41 +0200 Subject: [PATCH 405/439] add -no-upstream to snpeff task --- snpeff.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 0f14e5b5..8718e01b 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -32,6 +32,7 @@ task SnpEff { Boolean hgvs = true Boolean lof = true Boolean noDownstream = false + Boolean noUpstream = false Boolean noIntergenic = false Boolean noShiftHgvs = false Int? upDownStreamLen @@ -39,7 +40,7 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" } command { @@ -55,6 +56,7 @@ task SnpEff { ~{true="-hgvs" false="-noHgvs" hgvs} \ ~{true="-lof" false="-noLof" lof} \ ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-upstream" false="" noUpstream} \ ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ @@ -82,6 +84,7 @@ task SnpEff { hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"} noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} From d9d989e07649ac3177f6464100e192418e716ce3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:22:36 +0200 Subject: [PATCH 406/439] Add snpsift filter --- CHANGELOG.md | 2 ++ snpsift.wdl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 snpsift.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa40b75..7e209f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Added a task for SnpSift filter. ++ Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/snpsift.wdl b/snpsift.wdl new file mode 100644 index 00000000..0bb413f6 --- /dev/null +++ b/snpsift.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Filter { + input { + File vcf + File? vcfIndex + String filterExpression + String outputPath = "./snpsift_filter.vcf" + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + } + + command { + SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + filter \ + "~{filterExpression}" \ + ~{vcf} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes # !UnknownRuntimeKey + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to filter.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "common"} + filterExpression: {description: "The SnpSift filtering expression.", category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d4eb18d70d68e5c75539c272bf0db065e5f0bf71 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:35:56 +0200 Subject: [PATCH 407/439] add region input to bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 5 ++++- snpsift.wdl | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e209f1a..f13ab24b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. diff --git a/bcftools.wdl b/bcftools.wdl index 7df8911d..11864a00 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + String? region Array[String] samples = [] String memory = "256MiB" @@ -368,7 +369,8 @@ task View { ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ - ~{inputFile} + ~{inputFile} \ + ~{region} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -390,6 +392,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + region: {description: "The region to retrieve from the VCF file.", category: "common"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/snpsift.wdl b/snpsift.wdl index 0bb413f6..5bac6484 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -36,6 +36,8 @@ task Filter { } command { + set -e + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ "~{filterExpression}" \ From ecd2242e9a71f352a6b11683a969f9f5804cb18d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 12:14:13 +0200 Subject: [PATCH 408/439] add an ipnut for an index file in bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f13ab24b..7d5ad41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. diff --git a/bcftools.wdl b/bcftools.wdl index 11864a00..b923781c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -344,6 +344,7 @@ task Stats { task View { input { File inputFile + File? inputFileIndex String outputPath = "output.vcf" Boolean excludeUncalled = false @@ -389,6 +390,7 @@ task View { parameter_meta { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} + inputFileIndex: {description: "the index for the input file.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From abcddcda79a0821ef86bb0d1b40f2e5b7264e829 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 11:56:23 +0200 Subject: [PATCH 409/439] fix wdlTools parsing issue in bcftools annotate --- CHANGELOG.md | 2 ++ bcftools.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d5ad41d..7ad69a3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Fixed an issue with the parameter_meta section of bcftools annotate + which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. diff --git a/bcftools.wdl b/bcftools.wdl index b923781c..6200a1a1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -111,7 +111,7 @@ task Annotate { collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} - newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} From 748fe367e1964e5014cdb60a3def6976f2846d3c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 14:51:52 +0200 Subject: [PATCH 410/439] change name of snpsift task --- snpsift.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpsift.wdl b/snpsift.wdl index 5bac6484..6b6a1feb 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Filter { +task SnpSiftFilter { input { File vcf File? vcfIndex From 2fc90c9790b41781ca35144e0d495f293a614382 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 16:05:27 +0200 Subject: [PATCH 411/439] add a useless ls to check if a dnanexus error is caused by lazy loading --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 6200a1a1..5ab04c1c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,6 +362,8 @@ task View { command { set -e + ls ~{inputFileIndex} + mkdir -p "$(dirname ~{outputPath})" bcftools view \ ~{"--exclude " + exclude} \ From 82a5715109d7c352c016d2672cea27b0ab4eb7f0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 28 May 2025 09:14:43 +0200 Subject: [PATCH 412/439] add ls to snpeff, bcftools view and snpsift so I can see the paths when run on dnanexus --- bcftools.wdl | 2 +- snpeff.wdl | 1 + snpsift.wdl | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5ab04c1c..0381d4cf 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,7 +362,7 @@ task View { command { set -e - ls ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index 8718e01b..924db8db 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,6 +45,7 @@ task SnpEff { command { set -e + ls ~{vcf} ~{vcfIndex} mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ diff --git a/snpsift.wdl b/snpsift.wdl index 6b6a1feb..5daacd36 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -37,6 +37,8 @@ task SnpSiftFilter { command { set -e + ls ~{vcf} ~{vcfIndex} + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ From 0513965516fab2b2a6a4c9d146813e65ffa77b19 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 14:04:40 +0200 Subject: [PATCH 413/439] Update modkit.wdl --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 7546458a..424ba755 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 58b52865e986970b7c49d10096afbf1d0eec8e84 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 15:37:47 +0200 Subject: [PATCH 414/439] Update modkit.wdl More reasonable bounds --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 424ba755..094f0041 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 5d4f097ad010fb12c4b7599511eaafc741b64932 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 16:01:43 +0200 Subject: [PATCH 415/439] re-correct it. --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 094f0041..4aecb517 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From eafceb0f98e68feb884f8a947c15c29a2e52eb5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 3 Jun 2025 15:52:08 +0200 Subject: [PATCH 416/439] WIP add option to output compressed VCF files to snpeff and snpsift --- snpeff.wdl | 10 ++++++++-- snpsift.wdl | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/snpeff.wdl b/snpeff.wdl index 924db8db..e1b520af 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -40,9 +40,12 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" + # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1 + String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} @@ -61,12 +64,15 @@ task SnpEff { ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} rm -r $PWD/data } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { diff --git a/snpsift.wdl b/snpsift.wdl index 5daacd36..d964c255 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -44,11 +44,14 @@ task SnpSiftFilter { filter \ "~{filterExpression}" \ ~{vcf} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { From 16656ff77fa9f88577298fd7e8cc00c5eba02004 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Jun 2025 11:38:01 +0200 Subject: [PATCH 417/439] update changelog, fix missing variable --- CHANGELOG.md | 1 + snpsift.wdl | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad69a3e..7de262af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. diff --git a/snpsift.wdl b/snpsift.wdl index d964c255..4c354f48 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -32,9 +32,12 @@ task SnpSiftFilter { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22 + String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} From e9189a7f5d61a46d1deec0108900a11d70630933 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 6 Jun 2025 10:55:53 +0200 Subject: [PATCH 418/439] missing trailing slash breaks samtools flagstat --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 743fce0c..ac2e868a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -296,9 +296,10 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" + samtools flagstat \ - --threads ~{threads - 1} - ~{inputBam} > ~{outputPath} + --threads ~{threads - 1} \ + ~{inputBam} > ~{outputPath} } output { From 69a9c0a6751f78cfaa75c325fc49425113e268b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 11:56:44 +0200 Subject: [PATCH 419/439] Add a task for bcftools norm --- CHANGELOG.md | 1 + bcftools.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7de262af..003aa97a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add a task for bcftools norm. + Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. diff --git a/bcftools.wdl b/bcftools.wdl index 0381d4cf..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -180,6 +180,67 @@ task Filter { } } +task Norm { + input { + File inputFile + File? inputFileIndex + String outputPath = "output.vcf.gz" + + File? fasta + String? regions + Boolean splitMultiallelicSites = false + + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + + command { + set -e + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + + mkdir -p "$(dirname ~{outputPath})" + bcftools norm \ + -o ~{outputPath} \ + -O ~{true="z" false="v" compressed} \ + ~{"--regions " + regions} \ + ~{"--fasta " + fasta} \ + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + + ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} + } + + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + fasta: {description: "Equivalent to bcftools norm's `--fasta` option.", category: "advanced"} + regions: {description: "Equivalent to bcftools norm's `--regions` option.", category: "advanced"} + splitMultiallelicSites: {description: "Whether multiallelic lines should be split up.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} + } +} + task Sort { input { File inputFile From 5d4f5a7fa3846dea7b8a16fce9c47d8674a5f260 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:26:56 +0200 Subject: [PATCH 420/439] more time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..5fb06016 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c97c55a47411b2395289ed3bf0357d8686dc7350 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:38:14 +0200 Subject: [PATCH 421/439] more time for bcftools Norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5fb06016..56564b17 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 + Int timeMinutes = 5 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 88ac2526f86f5a89d6de0fe74077f6bab05baf8d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:52:37 +0200 Subject: [PATCH 422/439] reset time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 56564b17..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3c8ec631930d4ec7df1d01ba802d5943257dfd42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:58:40 +0200 Subject: [PATCH 423/439] fix bcftools norm --- bcftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..ae2dee4e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -207,7 +207,8 @@ task Norm { -O ~{true="z" false="v" compressed} \ ~{"--regions " + regions} \ ~{"--fasta " + fasta} \ - ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} \ + ~{inputFile} ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} } From 5dab6c7b08f05e831ea110b44acdefedc298f67b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 13:36:41 +0200 Subject: [PATCH 424/439] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index ae2dee4e..b48956cc 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "2GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99c562c5e8ed51e8a2a04ec5dc72dada5248ff9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 15:08:05 +0200 Subject: [PATCH 425/439] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index b48956cc..7e297bc7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "10GiB" + String memory = "64GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99b9aca2b67d6a5d138c0b4dc9317f6b03bbc395 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 11 Jun 2025 14:24:53 +0200 Subject: [PATCH 426/439] Disable ai in multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44f71e4b..e0b036ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ version 6.0.0-dev + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. ++ Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index fae52178..18667b91 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -39,6 +39,7 @@ task MultiQC { # This must be actively enabled in my opinion. # The tools default is to upload. Boolean megaQCUpload = false + Boolean enableAi = false Int? dirsDepth String? title @@ -124,6 +125,7 @@ task MultiQC { ~{true="--lint" false="" lint} \ ~{true="--pdf" false="" pdf} \ ~{false="--no-megaqc-upload" true="" megaQCUpload} \ + ~{false="--no-ai" true="" enableAi} \ ~{"--config " + config} \ ~{"--cl-config " + clConfig } \ ~{reportDir} @@ -159,6 +161,7 @@ task MultiQC { lint: {description: "Equivalent to MultiQC's `--lint` flag.", category: "advanced"} pdf: {description: "Equivalent to MultiQC's `--pdf` flag.", category: "advanced"} megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} + enableAi: {description: "Opposite to MultiQC's `--no-ai` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} title: {description: "Equivalent to MultiQC's `--title` option.", category: "advanced"} comment: {description: "Equivalent to MultiQC's `--comment` option.", category: "advanced"} From c882527a4c6e3c476a2a1ba15319b30d70f6dc53 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 17 Jun 2025 10:46:19 +0200 Subject: [PATCH 427/439] Support supplying additional reports/config to multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..42542531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Support providing additional reports to MultiQC in workflow configuration. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index 18667b91..8f05a36e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -53,6 +53,7 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module + Array[File]+? additionalReports String? dataFormat File? config # A directory String? clConfig @@ -79,13 +80,15 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. + Array[File] allReports = flatten([reports, select_all([additionalReports])]) + command { python3 < Date: Wed, 18 Jun 2025 08:38:36 +0200 Subject: [PATCH 428/439] ahmust be flatter --- multiqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc.wdl b/multiqc.wdl index 8f05a36e..db47ac87 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -80,7 +80,7 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. - Array[File] allReports = flatten([reports, select_all([additionalReports])]) + Array[File] allReports = flatten([reports, flatten(select_all([additionalReports]))]) command { python3 < Date: Mon, 7 Jul 2025 11:27:17 +0200 Subject: [PATCH 429/439] bump bedtools sort --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index a5d8aab3..50acd42d 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -209,7 +209,7 @@ task Sort { String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } command { From c8fbb60c214cb3defe74b71aa60df6eefbae594a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 11:28:18 +0200 Subject: [PATCH 430/439] document --- CHANGELOG.md | 1 + bedtools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..920993ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ bedtools.Sort: bumped container version to permit use of `faidx`. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/bedtools.wdl b/bedtools.wdl index 50acd42d..7fcce28f 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -224,7 +224,7 @@ task Sort { ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ ~{"-g " + genome} \ - ~{"-faidx" + faidx} \ + ~{"-faidx " + faidx} \ > ~{outputBed} } From 633d0bc76ffb27f09b957e7eb26153a8f3d5edac Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 12:33:54 +0200 Subject: [PATCH 431/439] Add no name check support, requiring bumping Intersect as well --- bedtools.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 7fcce28f..64fccc7b 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -270,10 +270,11 @@ task Intersect { Boolean writeA = false Boolean writeB = false Boolean stranded = false + Boolean nonamecheck = false String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } Boolean sorted = defined(faidx) @@ -289,6 +290,7 @@ task Intersect { ~{true="-wb" false="" writeB} \ ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ + ~{true="-nonamecheck" false="" nonamecheck} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} } @@ -313,6 +315,7 @@ task Intersect { writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + nonamecheck: {description: "Disable the bedtools intersect name check. This is used to catch chr1 vs chr01 or chr1 vs 1 naming inconsistencies. However, it throws an error for GIAB hg38 which has capital letters. https://github.com/arq5x/bedtools2/issues/648", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c676fe2198b18b437a3e279c240290b7227b94d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 17 Jul 2025 10:37:57 +0200 Subject: [PATCH 432/439] address review comments --- bcftools.wdl | 13 ++++++++----- snpeff.wdl | 7 ++++++- snpsift.wdl | 7 ++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7e297bc7..31c7db13 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,8 +190,9 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "64GiB" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int diskGb = ceil(2.1 * size(inputFile, "G") + size(fasta, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -199,7 +200,7 @@ task Norm { command { set -e - ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools norm \ @@ -222,6 +223,7 @@ task Norm { memory: memory time_minutes: timeMinutes docker: dockerImage + disks: "local-disk ~{diskGb} SSD" # Based on an example in dxCompiler docs } parameter_meta { @@ -234,11 +236,12 @@ task Norm { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + diskGb: {description: "The amount of disk space needed for this job in GiB.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Sorted VCF file."} - outputVcfIndex: {description: "Index of sorted VCF file."} + outputVcf: {description: "Normalized VCF file."} + outputVcfIndex: {description: "Index of Normalized VCF file."} } } @@ -424,7 +427,7 @@ task View { command { set -e - ls ~{inputFile} ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index e1b520af..b972ab30 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -48,7 +48,7 @@ task SnpEff { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -82,6 +82,7 @@ task SnpEff { } parameter_meta { + # inputs vcf: {description: "A VCF file to analyse.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "required"} genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} @@ -102,5 +103,9 @@ task SnpEff { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of annotated VCF file."} } } diff --git a/snpsift.wdl b/snpsift.wdl index 4c354f48..a62f7295 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -40,7 +40,7 @@ task SnpSiftFilter { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -64,6 +64,7 @@ task SnpSiftFilter { } parameter_meta { + # inputs vcf: {description: "A VCF file to filter.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "common"} filterExpression: {description: "The SnpSift filtering expression.", category: "required"} @@ -75,5 +76,9 @@ task SnpSiftFilter { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Filtered VCF file."} + outputVcfIndex: {description: "Index of filtered VCF file."} } } From 9e9ae08503c7c2e10c0fe16d018bfb2810c4f3de Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 11:36:55 +0200 Subject: [PATCH 433/439] Update clair3 image --- CHANGELOG.md | 1 + clair3.wdl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c56b124a..5cabdece 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Update clair3 version from 1.0.11 to 1.1.0 version 5.2.0 --------------------------- diff --git a/clair3.wdl b/clair3.wdl index 5a6154af..ae54ef40 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,8 +34,8 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/clair3:1.1.0--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" @@ -91,4 +91,4 @@ task Clair3 { vcfIndex: {description: "Output VCF index."} } -} \ No newline at end of file +} From d648745cfeedbc816081547f9772f0ee2d9f1692 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:13:43 +0200 Subject: [PATCH 434/439] Improve whatshap runtime/memory estimates --- CHANGELOG.md | 1 + whatshap.wdl | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cabdece..cfb8f41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ version 6.0.0-dev + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 ++ Improve whatshap runtime/memory usage for our cluster. version 5.2.0 --------------------------- diff --git a/whatshap.wdl b/whatshap.wdl index da86ad82..beef5e99 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -40,12 +40,19 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 + + String memory = 2 + ceil(size(bam, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e + + mkdir -p $(dirname ~{outputVCF}) + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -110,12 +117,16 @@ task Stats { String? chromosome String memory = "4GiB" - Int timeMinutes = 120 + Int timeMinutes = 30 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { + set -e + + mkdir -p $(dirname ~{tsv}) + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ @@ -169,7 +180,9 @@ task Haplotag { String? regions String? sample - String memory = "4GiB" + String memory = 2 + ceil(size(bam, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -177,6 +190,9 @@ task Haplotag { command { set -e + + mkdir -p $(dirname ~{outputFile}) + whatshap haplotag \ ~{vcf} \ ~{alignments} \ From 7e246b01de31489577c434f69a5adbd2ab7cea2c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:18:54 +0200 Subject: [PATCH 435/439] Add modkit tasks --- CHANGELOG.md | 3 + modkit.wdl | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb8f41d..bf9d9238 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,9 @@ version 6.0.0-dev + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. ++ Add `Modkit.SampleProbs` ++ Add `Modkit.DmrMulti` ++ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 678e326a..a35d8ed2 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -130,7 +130,7 @@ task Summary { Int threads = 4 String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). - Int timeMinutes = 2880 / threads # 2 Days / threads + Int timeMinutes = 60 # originally this was set at "2 Days / threads" but with 4 threads and that much ram, it's pretty fast. String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } @@ -177,3 +177,192 @@ task Summary { summaryReport: {description: "The output modkit summary."} } } + +task SampleProbs { + input { + File bam + File bamIndex + + String summary = "modkit-sample-probs" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{summary} + + modkit sample-probs \ + --threads ~{threads} \ + --out-dir ~{summary} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + --hist \ + ~{bam} + >>> + + output { + File reportCounts = "~{summary}/counts.html" + File reportProportion = "~{summary}/proportion.html" + File reportProbabilitiesTsv = "~{summary}/probabilities.tsv" + File reportThresholdsTsv = "~{summary}/thresholds.tsv" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} + +task DmrMultiInputPrep { + input { + Array[File] control + Array[File] condition + String controlName + String conditionName + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" + } + + command <<< + cat > modkit_dmr.py <<'CODE' + #!/usr/bin/env python3 + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--control_n', type=str, default='control') + parser.add_argument('--control_f', type=str,nargs='+') + parser.add_argument('--condition_n', type=str, default='condition') + parser.add_argument('--condition_f', type=str,nargs='+') + args = parser.parse_args() + modkit = [] + for i, x in enumerate(args.control_f): + modkit.extend(['-s', x, f'{args.control_n}{i}']) + for i, x in enumerate(args.condition_f): + modkit.extend(['-s', x, f'{args.condition_n}{i}']) + print(' '.join(modkit), end='') + CODE + + python modkit_dmr.py \ + --control_n ~{controlName} \ + --control_f ~{sep=" " control} \ + --condition_n ~{conditionName} \ + --condition_f ~{sep=" " condition} + >>> + + output { + String params = select_first(read_lines(stdout())) + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } +} + + +task DmrMulti { + input { + String dmrMultiArguments + Array[File] control + Array[File] condition + + Array[File] controlIndex + Array[File] conditionIndex + + String controlName + String conditionName + + File referenceFasta + File referenceFastaFai + String dmr_dir = "results" + + File? cpg_islands + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 600 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{dmr_dir} + + modkit dmr multi \ + ~{dmrMultiArguments} \ + --out-dir ~{dmr_dir} \ + ~{"--regions-bed " + cpg_islands} \ + --ref ~{referenceFasta} \ + --base C \ + --threads ~{threads} \ + --header \ + --log-filepath dmr_multi.log + >>> + + output { + # TODO: other files + File log = "dmr_multi.log" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From 8564f8c0a757cecd00155de011e10a1e51ab32cf Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 436/439] I don't feel like documenting it if it isn't used --- CHANGELOG.md | 2 - modkit.wdl | 122 --------------------------------------------------- 2 files changed, 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf9d9238..bb09f4f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,8 +57,6 @@ version 6.0.0-dev + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. + Add `Modkit.SampleProbs` -+ Add `Modkit.DmrMulti` -+ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a35d8ed2..b38929f5 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -244,125 +244,3 @@ task SampleProbs { summaryReport: {description: "The output modkit summary."} } } - -task DmrMultiInputPrep { - input { - Array[File] control - Array[File] condition - String controlName - String conditionName - - Int threads = 1 - String memory = "1G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" - } - - command <<< - cat > modkit_dmr.py <<'CODE' - #!/usr/bin/env python3 - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--control_n', type=str, default='control') - parser.add_argument('--control_f', type=str,nargs='+') - parser.add_argument('--condition_n', type=str, default='condition') - parser.add_argument('--condition_f', type=str,nargs='+') - args = parser.parse_args() - modkit = [] - for i, x in enumerate(args.control_f): - modkit.extend(['-s', x, f'{args.control_n}{i}']) - for i, x in enumerate(args.condition_f): - modkit.extend(['-s', x, f'{args.condition_n}{i}']) - print(' '.join(modkit), end='') - CODE - - python modkit_dmr.py \ - --control_n ~{controlName} \ - --control_f ~{sep=" " control} \ - --condition_n ~{conditionName} \ - --condition_f ~{sep=" " condition} - >>> - - output { - String params = select_first(read_lines(stdout())) - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } -} - - -task DmrMulti { - input { - String dmrMultiArguments - Array[File] control - Array[File] condition - - Array[File] controlIndex - Array[File] conditionIndex - - String controlName - String conditionName - - File referenceFasta - File referenceFastaFai - String dmr_dir = "results" - - File? cpg_islands - - Int threads = 4 - String memory = "32G" - Int timeMinutes = 600 - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" - } - - command <<< - set -e - mkdir -p ~{dmr_dir} - - modkit dmr multi \ - ~{dmrMultiArguments} \ - --out-dir ~{dmr_dir} \ - ~{"--regions-bed " + cpg_islands} \ - --ref ~{referenceFasta} \ - --base C \ - --threads ~{threads} \ - --header \ - --log-filepath dmr_multi.log - >>> - - output { - # TODO: other files - File log = "dmr_multi.log" - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } - - parameter_meta { - # input - bam: {description: "The input alignment file", category: "required"} - bamIndex: {description: "The index for the input alignment file", category: "required"} - - sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} - numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} - samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} - seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # output - summaryReport: {description: "The output modkit summary."} - } -} From 040a43e6723d672f97f126a1628e773d75fb6515 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 437/439] I don't feel like documenting it if it isn't used --- modkit.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index b38929f5..ddf4dbf7 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -229,6 +229,7 @@ task SampleProbs { # input bam: {description: "The input alignment file", category: "required"} bamIndex: {description: "The index for the input alignment file", category: "required"} + summary: {description: "A folder for the outputs", category: "required"} sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} @@ -241,6 +242,9 @@ task SampleProbs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - summaryReport: {description: "The output modkit summary."} + reportCounts: {description: "The output html report of counts"} + reportProportion: {description: "The output html report of proportions"} + reportProbabilitiesTsv: {description: "The output TSV of Probabilities"} + reportThresholdsTsv: {description: "The output TSV of thresholds"} } } From bd54aeb0bcdd67db3fb180b890954bca92000287 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 13:13:43 +0200 Subject: [PATCH 438/439] incorrect inputs --- whatshap.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index beef5e99..3b2bd1d3 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -41,8 +41,8 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 - String memory = 2 + ceil(size(bam, "G") / 20 ) - Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + String memory = 2 + ceil(size(phaseInput, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -180,8 +180,8 @@ task Haplotag { String? regions String? sample - String memory = 2 + ceil(size(bam, "G") / 50 ) - Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + String memory = 2 + ceil(size(alignments, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. From e7061594546ceac5e7bbcdc48877bc78b5ec795c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 15:46:06 +0200 Subject: [PATCH 439/439] Fix duplicate declarations --- whatshap.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 3b2bd1d3..b491f566 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,9 +38,6 @@ task Phase { String? threshold String? ped - String memory = "4GiB" - Int timeMinutes = 120 - String memory = 2 + ceil(size(phaseInput, "G") / 20 ) Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) @@ -183,7 +180,6 @@ task Haplotag { String memory = 2 + ceil(size(alignments, "G") / 50 ) Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) - Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" }